diff --git a/common/include/PS2Etypes.h b/common/include/PS2Etypes.h index 705d22aa46..6f652e8736 100644 --- a/common/include/PS2Etypes.h +++ b/common/include/PS2Etypes.h @@ -55,8 +55,6 @@ // disable the default case in a switch #define jNO_DEFAULT \ { \ - break; \ - \ default: \ jASSUME(0); \ break; \ diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 8a71d245c4..2da21f9f11 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -20,11 +20,11 @@ #define __PCSX2CONFIG_H__ // Hack so that you can still use this file from C (not C++), or from a plugin without access to Paths.h. -#ifdef PLUGIN_ONLY +// .. and removed in favor of a less hackish approach (air) + +#ifndef g_MaxPath #define g_MaxPath 255 -#else -#include "Paths.h" - #endif +#endif ///////////////////////////////////////////////////////////////////////// // Session Configuration Override Flags @@ -44,7 +44,8 @@ extern SessionOverrideFlags g_Session; ////////////////////////////////////////////////////////////////////////// // Pcsx2 User Configuration Options! -//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) #define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs #define PCSX2_EEREC 0x10 #define PCSX2_VU0REC 0x20 diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 1461bb83bd..4daeee5663 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -29,6 +29,8 @@ #define PCSX2_VERSION "(beta)" +#include "System.h" + #include "Plugins.h" #include "SaveState.h" @@ -40,7 +42,4 @@ #include "Elfheader.h" #include "Patch.h" -#include "System.h" -#include "Pcsx2Config.h" - #endif /* __COMMON_H__ */ diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index a58ec1d0a3..b9b9a7c9ed 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -164,7 +164,7 @@ struct vSyncTimingInfo static vSyncTimingInfo vSyncInfo; -static __forceinline void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame ) +static void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame ) { // Important: Cannot use floats or doubles here. The emulator changes rounding modes // depending on user-set speedhack options, and it can break float/double code @@ -270,8 +270,6 @@ u32 UpdateVSyncRate() return (u32)m_iTicks; } -extern u32 vu0time; - void frameLimitReset() { m_iStart = GetCPUTicks(); @@ -282,13 +280,13 @@ void frameLimitReset() // See the GS FrameSkip function for details on why this is here and not in the GS. static __forceinline void frameLimit() { + if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return; + if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off... + s64 sDeltaTime; u64 uExpectedEnd; u64 iEnd; - if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return; - if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off... - uExpectedEnd = m_iStart + m_iTicks; iEnd = GetCPUTicks(); @@ -465,7 +463,7 @@ __forceinline bool rcntUpdate_vSync() return false; } -static __forceinline void __fastcall _cpuTestTarget( int i ) +static __forceinline void _cpuTestTarget( int i ) { if (counters[i].count < counters[i].target) return; @@ -538,7 +536,7 @@ __forceinline bool rcntUpdate() return retval; } -static void _rcntSetGate( int index ) +static __forceinline void _rcntSetGate( int index ) { if (counters[index].mode.EnableGate) { @@ -563,7 +561,7 @@ static void _rcntSetGate( int index ) } // mode - 0 means hblank source, 8 means vblank source. -void __fastcall rcntStartGate(bool isVblank, u32 sCycle) +__forceinline void rcntStartGate(bool isVblank, u32 sCycle) { int i; @@ -624,7 +622,7 @@ void __fastcall rcntStartGate(bool isVblank, u32 sCycle) } // mode - 0 means hblank signal, 8 means vblank signal. -void __fastcall rcntEndGate(bool isVblank , u32 sCycle) +__forceinline void rcntEndGate(bool isVblank , u32 sCycle) { int i; @@ -665,7 +663,7 @@ void __fastcall rcntEndGate(bool isVblank , u32 sCycle) // rcntUpdate, since we're being called from there anyway. } -void __fastcall rcntWmode(int index, u32 value) +__forceinline void rcntWmode(int index, u32 value) { if(counters[index].mode.IsCounting) { if(counters[index].mode.ClockSource != 0x3) { @@ -696,7 +694,7 @@ void __fastcall rcntWmode(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWcount(int index, u32 value) +__forceinline void rcntWcount(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeCount = %x, oldcount=%x, target=%x", index, value, counters[index].count, counters[index].target ); @@ -722,7 +720,7 @@ void __fastcall rcntWcount(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWtarget(int index, u32 value) +__forceinline void rcntWtarget(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeTarget = %x", index, value); @@ -738,13 +736,13 @@ void __fastcall rcntWtarget(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWhold(int index, u32 value) +__forceinline void rcntWhold(int index, u32 value) { EECNT_LOG("EE Counter[%d] Hold Write = %x", index, value); counters[index].hold = value; } -u32 __fastcall rcntRcount(int index) +__forceinline u32 rcntRcount(int index) { u32 ret; @@ -759,7 +757,7 @@ u32 __fastcall rcntRcount(int index) return ret; } -u32 __fastcall rcntCycle(int index) +__forceinline u32 rcntCycle(int index) { if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3)) return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate); diff --git a/pcsx2/Counters.h b/pcsx2/Counters.h index 6e20dda0c2..5333bcb111 100644 --- a/pcsx2/Counters.h +++ b/pcsx2/Counters.h @@ -139,14 +139,14 @@ extern bool rcntUpdate_vSync(); extern bool rcntUpdate(); extern void rcntInit(); -extern void __fastcall rcntStartGate(bool mode, u32 sCycle); -extern void __fastcall rcntEndGate(bool mode, u32 sCycle); -extern void __fastcall rcntWcount(int index, u32 value); -extern void __fastcall rcntWmode(int index, u32 value); -extern void __fastcall rcntWtarget(int index, u32 value); -extern void __fastcall rcntWhold(int index, u32 value); -extern u32 __fastcall rcntRcount(int index); -extern u32 __fastcall rcntCycle(int index); +extern void rcntStartGate(bool mode, u32 sCycle); +extern void rcntEndGate(bool mode, u32 sCycle); +extern void rcntWcount(int index, u32 value); +extern void rcntWmode(int index, u32 value); +extern void rcntWtarget(int index, u32 value); +extern void rcntWhold(int index, u32 value); +extern u32 rcntRcount(int index); +extern u32 rcntCycle(int index); u32 UpdateVSyncRate(); void frameLimitReset(); diff --git a/pcsx2/DebugTools/Debug.h b/pcsx2/DebugTools/Debug.h index 1e42cea714..5abcd4f2d0 100644 --- a/pcsx2/DebugTools/Debug.h +++ b/pcsx2/DebugTools/Debug.h @@ -190,6 +190,8 @@ extern bool SrcLog_GPU( const char* fmt, ... ); #define MEMCARDS_LOG 0&& #endif +//#define VIFUNPACKDEBUG //enable unpack debugging output + #ifdef VIFUNPACKDEBUG #define VIFUNPACK_LOG VIF_LOG #else diff --git a/pcsx2/Exceptions.h b/pcsx2/Exceptions.h index c15ffb5acb..640e61c0e1 100644 --- a/pcsx2/Exceptions.h +++ b/pcsx2/Exceptions.h @@ -16,11 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef _PCSX2_EXCEPTIONS_H_ -#define _PCSX2_EXCEPTIONS_H_ - -#include -#include "StringUtils.h" +#pragma once // This class provides an easy and clean method for ensuring objects are not copyable. class NoncopyableObject @@ -380,5 +376,3 @@ namespace Exception {} }; } - -#endif diff --git a/pcsx2/PrecompiledHeader.h b/pcsx2/PrecompiledHeader.h index 84c3e977d2..e2b24c72e3 100644 --- a/pcsx2/PrecompiledHeader.h +++ b/pcsx2/PrecompiledHeader.h @@ -33,6 +33,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // Include the STL junk that's actually handy. +#include #include #include #include @@ -69,7 +70,9 @@ typedef int BOOL; #include "zlib/zlib.h" #include "PS2Etypes.h" +#include "MemcpyFast.h" #include "StringUtils.h" +#include "Exceptions.h" //////////////////////////////////////////////////////////////////// // Compiler/OS specific macros and defines -- Begin Section @@ -155,24 +158,3 @@ static __forceinline u32 timeGetTime() # define __releaseinline __forceinline #endif -////////////////////////////////////////////////////////////////////////////////////////// -// Emitter Instance Identifiers. If you add a new emitter, do it here also. -// Note: Currently most of the instances map back to 0, since existing dynarec code all -// shares iCore and must therefore all share the same emitter instance. -// (note: these don't really belong here per-se, but it's an easy spot to use for now) -enum -{ - EmitterId_R5900 = 0, - EmitterId_R3000a = EmitterId_R5900, - EmitterId_VU0micro = EmitterId_R5900, - EmitterId_VU1micro = EmitterId_R5900, - - // Cotton's new microVU, which is iCore-free - EmitterId_microVU0, - EmitterId_microVU1, - - // Air's eventual IopRec, which will also be iCore-free - EmitterId_R3000air, - - EmitterId_Count // must always be last! -}; diff --git a/pcsx2/R3000A.cpp b/pcsx2/R3000A.cpp index 50931387a3..c821733549 100644 --- a/pcsx2/R3000A.cpp +++ b/pcsx2/R3000A.cpp @@ -224,7 +224,7 @@ static __forceinline void _psxTestInterrupts() } } -void psxBranchTest() +__releaseinline void psxBranchTest() { if( psxTestCycle( psxNextsCounter, psxNextCounter ) ) { diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index 7ab13663e8..9a93b851c1 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -200,7 +200,7 @@ extern R3000Acpu psxRec; void psxReset(); void psxShutdown(); void psxException(u32 code, u32 step); -void psxBranchTest(); +extern void psxBranchTest(); void psxExecuteBios(); void psxMemReset(); diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index b19d06dd0b..35e7dda1fe 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -106,7 +106,7 @@ void cpuShutdown() disR5900FreeSyms(); } -__releaseinline void __fastcall cpuException(u32 code, u32 bd) +__releaseinline void cpuException(u32 code, u32 bd) { cpuRegs.branch = 0; // Tells the interpreter that an exception occurred during a branch. bool errLevel2, checkStatus; @@ -244,7 +244,7 @@ void cpuTestMissingHwInts() { } // sets a branch test to occur some time from an arbitrary starting point. -__forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ) +__forceinline void cpuSetNextBranch( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. @@ -252,20 +252,18 @@ __forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ) if( (int)(g_nextBranchCycle - startCycle) > delta ) { g_nextBranchCycle = startCycle + delta; - return 1; } - return 0; } // sets a branch to occur some time from the current cycle -__forceinline int __fastcall cpuSetNextBranchDelta( s32 delta ) +__forceinline void cpuSetNextBranchDelta( s32 delta ) { - return cpuSetNextBranch( cpuRegs.cycle, delta ); + cpuSetNextBranch( cpuRegs.cycle, delta ); } // tests the cpu cycle agaisnt the given start and delta values. // Returns true if the delta time has passed. -__forceinline int __fastcall cpuTestCycle( u32 startCycle, s32 delta ) +__forceinline int cpuTestCycle( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't explode // if the startCycle is ahead of our current cpu cycle. @@ -279,7 +277,7 @@ __forceinline void cpuSetBranch() g_nextBranchCycle = cpuRegs.cycle; } -void cpuClearInt( uint i ) +__forceinline void cpuClearInt( uint i ) { jASSUME( i < 32 ); cpuRegs.interrupt &= ~(1 << i); diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index b2dd4db46f..cb482ee6bf 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -257,14 +257,14 @@ extern void cpuInit(); extern void cpuReset(); // can throw Exception::FileNotFound. extern void cpuShutdown(); extern void cpuExecuteBios(); -extern void __fastcall cpuException(u32 code, u32 bd); +extern void cpuException(u32 code, u32 bd); extern void cpuTlbMissR(u32 addr, u32 bd); extern void cpuTlbMissW(u32 addr, u32 bd); extern void cpuTestHwInts(); -extern int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ); -extern int __fastcall cpuSetNextBranchDelta( s32 delta ); -extern int __fastcall cpuTestCycle( u32 startCycle, s32 delta ); +extern void cpuSetNextBranch( u32 startCycle, s32 delta ); +extern void cpuSetNextBranchDelta( s32 delta ); +extern int cpuTestCycle( u32 startCycle, s32 delta ); extern void cpuSetBranch(); extern bool _cpuBranchTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900: diff --git a/pcsx2/R5900OpcodeTables.h b/pcsx2/R5900OpcodeTables.h index 8f4d956848..cd2a5e499c 100644 --- a/pcsx2/R5900OpcodeTables.h +++ b/pcsx2/R5900OpcodeTables.h @@ -18,8 +18,6 @@ #ifndef _R5900_OPCODETABLES_H #define _R5900_OPCODETABLES_H -#include - #include "PS2Etypes.h" // TODO : Move these into the OpcodeTables namespace diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index b40fdbccbd..45c3274a2a 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -451,16 +451,16 @@ void SIO_CommandWrite(u8 value,int way) { break; case 0x21: // Set pad slot. - sio.mtapst = 0x21; + sio.mtapst = value; sio.bufcount = 6; // No idea why this is 6, saved from old code. break; case 0x22: // Set memcard slot. - sio.mtapst = 0x22; + sio.mtapst = value; sio.bufcount = 6; // No idea why this is 6, saved from old code. break; } - // Commented out values are from original code. Break multitap in bios. + // Commented out values are from original code. They break multitap in bios. sio.buf[sio.bufcount-1]=0;//'+'; sio.buf[sio.bufcount]=0;//'Z'; return; @@ -554,6 +554,7 @@ void InitializeSIO(u8 value) int port = sio.GetMultitapPort(); if (!IsMtapPresent(port)) { + // If "unplug" multitap mid game, set active slots to 0. sio.activePadSlot[port] = 0; sio.activeMemcardSlot[port] = 0; } diff --git a/pcsx2/System.h b/pcsx2/System.h index 80c7516749..09dff0196c 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -20,9 +20,9 @@ #define __SYSTEM_H__ #include "PS2Etypes.h" +#include "Paths.h" #include "Pcsx2Config.h" #include "Exceptions.h" -#include "Paths.h" #include "MemcpyFast.h" #include "SafeArray.h" #include "Misc.h" diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index e043760350..299803d2d6 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2508,13 +2508,23 @@ void _vuRegsMTIR(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; VUregsn->VFwrite = 0; VUregsn->VFread0 = _Fs_; - VUregsn->VFr0xyzw= _XYZW; + VUregsn->VFr0xyzw= 1 << (3-_Fsf_); VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _Ft_; VUregsn->VIread = GET_VF0_FLAG(_Fs_); } -VUREGS_FTFS(MR32); +void _vuRegsMR32(VURegs * VU, _VURegsNum *VUregsn) { + VUregsn->pipe = VUPIPE_FMAC; + VUregsn->VFwrite = _Ft_; + VUregsn->VFwxyzw = _XYZW; + VUregsn->VFread0 = _Fs_; + VUregsn->VFr0xyzw= (_XYZW >> 1) | ((_XYZW << 3) & 0xf); //rotate + VUregsn->VFread1 = 0; + VUregsn->VFr1xyzw = 0xff; + VUregsn->VIwrite = 0; + VUregsn->VIread = (_Ft_ ? GET_VF0_FLAG(_Fs_) : 0); +} void _vuRegsLQ(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index a2ebe6f961..cb81e5f6b2 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -25,10 +25,10 @@ #include "Vif.h" #include "VifDma.h" -VIFregisters *_vifRegs; -u32* _vifRow = NULL, *_vifCol = NULL; -u32* _vifMaskRegs = NULL; -vifStruct *_vif; +VIFregisters *vifRegs; +u32* vifRow = NULL, *vifCol = NULL; +u32* vifMaskRegs = NULL; +vifStruct *vif; PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]); @@ -44,35 +44,37 @@ enum UnpackOffset OFFSET_X = 0, OFFSET_Y = 1, OFFSET_Z = 2, - OFFSET_W =3 + OFFSET_W = 3 }; -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) - __forceinline static int _limit(int a, int max) { return (a > max) ? max : a; } -static __releaseinline void writeX(u32 &dest, u32 data) +static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { int n; + u32 vifRowReg = getVifRowRegs(offnum); - if (_vifRegs->code & 0x10000000) + if (vifRegs->code & 0x10000000) { - switch (_vif->cl) + switch (vif->cl) { case 0: - n = (_vifRegs->mask) & 0x3; + if (offnum == OFFSET_X) + n = (vifRegs->mask) & 0x3; + else + n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; case 1: - n = (_vifRegs->mask >> 8) & 0x3; + n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; case 2: - n = (_vifRegs->mask >> 16) & 0x3; + n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; default: - n = (_vifRegs->mask >> 24) & 0x3; + n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; } } @@ -81,355 +83,144 @@ static __releaseinline void writeX(u32 &dest, u32 data) switch (n) { case 0: - if ((_vif->cmd & 0x6F) == 0x6f) + if ((vif->cmd & 0x6F) == 0x6f) { dest = data; } - else if (_vifRegs->mode == 1) + else switch (vifRegs->mode) { - dest = data + _vifRegs->r0; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r0 += data; - dest = _vifRegs->r0; - } - else - { - dest = data; + case 1: + dest = data + vifRowReg; + break; + case 2: + // vifRowReg isn't used after this, or I would make it equal to dest here. + dest = setVifRowRegs(offnum, vifRowReg + data); + break; + default: + dest = data; + break; } break; case 1: - dest = _vifRegs->r0; + dest = vifRowReg; break; case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } + dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl); + break; + case 3: break; } -// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); -} - -static __releaseinline void writeY(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 2) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 10) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 18) & 0x3; - break; - default: - n = (_vifRegs->mask >> 26) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r1; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r1 += data; - dest = _vifRegs->r1; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r1; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeY %8.8x : Mode %d, r1 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r1,data); -} - -static __releaseinline void writeZ(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 4) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 12) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 20) & 0x3; - break; - default: - n = (_vifRegs->mask >> 28) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r2; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r2 += data; - dest = _vifRegs->r2; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r2; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeZ %8.8x : Mode %d, r2 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r2,data); -} - -static __releaseinline void writeW(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 6) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 14) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 22) & 0x3; - break; - default: - n = (_vifRegs->mask >> 30) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r3; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r3 += data; - dest = _vifRegs->r3; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r3; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeW %8.8x : Mode %d, r3 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r3,data); -} - -template -static void _UNPACKpart(u32 offnum, u32 &x, T y) -{ - if (_vifRegs->offset == offnum) - { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } - _vifRegs->offset++; - } -} - -template -static void _UNPACKpart(u32 offnum, u32 &x, T y, int &size) -{ - if (_vifRegs->offset == offnum) - { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } - size--; - _vifRegs->offset++; - } +// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data); } template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data, size); - _UNPACKpart(OFFSET_Y, *dest++, *data, size); - _UNPACKpart(OFFSET_Z, *dest++, *data, size); - _UNPACKpart(OFFSET_W, *dest , *data, size); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + //S-# will always be a complete packet, no matter what. So we can skip the offset bits + writeXYZW(OFFSET_X, *dest++, *data); + writeXYZW(OFFSET_Y, *dest++, *data); + writeXYZW(OFFSET_Z, *dest++, *data); + writeXYZW(OFFSET_W, *dest , *data); } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data--, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++); - _UNPACKpart(OFFSET_W, *dest , *data); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + if (vifRegs->offset == OFFSET_X) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; + size--; + } + } + + if (vifRegs->offset == OFFSET_Y) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data); + vifRegs->offset = OFFSET_Z; + size--; + } + } + + if (vifRegs->offset == OFFSET_Z) + { + writeXYZW(vifRegs->offset, *dest++, *dest-2); + vifRegs->offset = OFFSET_W; + } + + if (vifRegs->offset == OFFSET_W) + { + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; + } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data++, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++, size); - _UNPACKpart(OFFSET_W, *dest, *data); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + if(vifRegs->offset == OFFSET_X) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; + size--; + } + } + + if(vifRegs->offset == OFFSET_Y) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Z; + size--; + } + } + + if(vifRegs->offset == OFFSET_Z) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_W; + size--; + } + } + + if(vifRegs->offset == OFFSET_W) + { + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; + } } template void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data++, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++, size); - _UNPACKpart(OFFSET_W, *dest , *data, size); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset++; + size--; + } + + if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3), size); - _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2), size); - _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7), size); - _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8), size); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + //As with S-#, this will always be a complete packet + writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); + writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); + writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); + writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) @@ -599,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer() return ret; } -static __forceinline int mfifoVIF1chain() +static __forceinline int mfifo_VIF1chain() { int ret; @@ -739,7 +530,7 @@ void vifMFIFOInterrupt() { g_vifCycles = 0; - if (vif1.inprogress == 1) mfifoVIF1chain(); + if (vif1.inprogress == 1) mfifo_VIF1chain(); if (vif1.irq && vif1.tag.size == 0) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 4ad1cb233e..e01cb32bd8 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -24,6 +24,7 @@ struct vifCycle { u8 pad[2]; }; +// r0-r3 and c0-c3 would be more managable as arrays. struct VIFregisters { u32 stat; u32 pad0[3]; @@ -80,14 +81,97 @@ struct VIFregisters { extern "C" { // these use cdecl for Asm code references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; extern u32* _vifCol; } +static __forceinline u32 setVifRowRegs(u32 reg, u32 data) +{ + switch (reg) + { + case 0: + vifRegs->r0 = data; + break; + case 1: + vifRegs->r1 = data; + break; + case 2: + vifRegs->r2 = data; + break; + case 3: + vifRegs->r3 = data; + break; + jNO_DEFAULT; + } + return data; +} + +static __forceinline u32 getVifRowRegs(u32 reg) +{ + switch (reg) + { + case 0: + return vifRegs->r0; + break; + case 1: + return vifRegs->r1; + break; + case 2: + return vifRegs->r2; + break; + case 3: + return vifRegs->r3; + break; + jNO_DEFAULT; + } +} + +static __forceinline u32 setVifColRegs(u32 reg, u32 data) +{ + switch (reg) + { + case 0: + vifRegs->c0 = data; + break; + case 1: + vifRegs->c1 = data; + break; + case 2: + vifRegs->c2 = data; + break; + case 3: + vifRegs->c3 = data; + break; + jNO_DEFAULT; + } + return data; +} + +static __forceinline u32 getVifColRegs(u32 reg) +{ + switch (reg) + { + case 0: + return vifRegs->c0; + break; + case 1: + return vifRegs->c1; + break; + case 2: + return vifRegs->c2; + break; + case 3: + return vifRegs->c3; + break; + jNO_DEFAULT; + } +} + #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) +#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) void dmaVIF0(); void dmaVIF1(); diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 02361285c2..4d466169cc 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -29,7 +29,7 @@ using namespace std; // for min / max -//#define VIFUNPACKDEBUG //enable unpack debugging output + #define gif ((DMACh*)&PS2MEM_HW[0xA000]) @@ -37,10 +37,10 @@ using namespace std; // for min / max extern "C" { // Need cdecl on these for ASM references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; - extern u32* _vifCol; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; + extern u32* vifCol; } PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); @@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]); -extern vifStruct *_vif; +extern vifStruct *vif; vifStruct vif0, vif1; @@ -254,57 +254,45 @@ __forceinline static int _limit(int a, int max) static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum) { const VIFUnpackFuncTable *unpack; - vifStruct *vif; - VIFregisters *vifRegs; + unpack = &VIFfuncTable[ unpackType ]; - if (VIFdmanum == 0) - { - vif = &vif0; - vifRegs = vif0Regs; - } - else - { - vif = &vif1; - vifRegs = vif1Regs; - } - switch (unpackType) { case 0x0: - vif->tag.addr += size * 4; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-32 skip, size = %d", size); break; case 0x1: - vif->tag.addr += size * 8; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-16 skip, size = %d", size); break; case 0x2: - vif->tag.addr += size * 16; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-8 skip, size = %d", size); break; case 0x4: - vif->tag.addr += size + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-32 skip, size = %d", size); break; case 0x5: - vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-16 skip, size = %d", size); break; case 0x6: - vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size); break; case 0x8: - vif->tag.addr += size + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-32 skip, size = %d", size); break; case 0x9: - vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size); break; case 0xA: - vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-8 skip, size = %d", size); break; case 0xC: @@ -312,15 +300,15 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFUNPACK_LOG("Processing V4-32 skip, size = %d, CL = %d, WL = %d", size, vif1Regs->cycle.cl, vif1Regs->cycle.wl); break; case 0xD: - vif->tag.addr += size * 2; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-16 skip, size = %d", size); break; case 0xE: - vif->tag.addr += size * 4; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-8 skip, size = %d", size); break; case 0xF: - vif->tag.addr += size * 8; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-5 skip, size = %d", size); break; default: @@ -328,87 +316,59 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; } - if ((vif->tag.addr & 0xf) == unpack->gsize) + //Append any skips in to the equasion + + if (vifRegs->cycle.cl > vifRegs->cycle.wl) { - vif->tag.addr += 16 - unpack->gsize; + VIFUNPACK_LOG("Old addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); + vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16); + VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); } + + //This is sorted out later + if((vif->tag.addr & 0xf) != (vifRegs->offset * 4)) + { + VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr); + vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4); + } + } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; UNPACKFUNCTYPE func; const VIFUnpackFuncTable *ft; - vifStruct *vif; - VIFregisters *vifRegs; VURegs * VU; u8 *cdata = (u8*)data; + #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; #endif - _mm_prefetch((char*)data, _MM_HINT_NTA); - if (VIFdmanum == 0) { VU = &VU0; - vif = &vif0; - vifRegs = vif0Regs; assert(v->addr < memsize); } else { - VU = &VU1; - vif = &vif1; - vifRegs = vif1Regs; assert(v->addr < memsize); - - if (vu1MicroIsSkipping()) - { - // don't process since the frame is dummy - vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); - return; - } } dest = (u32*)(VU->Mem + v->addr); - VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); - -#ifdef _DEBUG - if (v->size != size) - { - VIF_LOG("*PCSX2*: warning v->size != size"); - } - - if ((v->addr + size*4) > memsize) - { - Console::Notice("*PCSX2*: fixme unpack overflow"); - Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); - } -#endif + VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); // The unpack type unpackType = v->cmd & 0xf; - - if (size == 0) - { - VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask); - } - - _mm_prefetch((char*)data + 128, _MM_HINT_NTA); - _vifRegs = (VIFregisters*)vifRegs; - _vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks; - _vif = vif; - _vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; ft = &VIFfuncTable[ unpackType ]; - func = _vif->usn ? ft->funcU : ft->funcS; + func = vif->usn ? ft->funcU : ft->funcS; size <<= 2; @@ -416,56 +376,64 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma memsize = size; #endif - if (_vifRegs->offset > 0) - { - int destinc, unpacksize; + if(vif1Regs->offset != 0) + { + int unpacksize; + + //This is just to make sure the alignment isnt loopy on a split packet + if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) + { + DevCon::Error("Warning: Unpack alignment error"); + } VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); - // SSE doesn't handle such small data - if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); - - if (vifRegs->offset < (u32)ft->qsize) + if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) + VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); + + if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) { - if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) - { - Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); - } + DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); + } unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset)); - } - else - { - unpacksize = 0; - Console::WriteLn("Unpack align offset = 0"); - } - destinc = (4 - ft->qsize) + unpacksize; + + VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); + func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; - cdata += unpacksize * ft->dsize; - + vifRegs->num--; ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) { if (vifRegs->cycle.cl != vifRegs->cycle.wl) - dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + { + vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4; + //dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + } else - dest += destinc; + { + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + //dest += destinc; + } vif->cl = 0; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); + return size >> 2; + } else { - dest += destinc; + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + dest += (4 - ft->qsize) + unpacksize; + cdata += unpacksize * ft->dsize; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); } - VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); - } - else if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); + - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + if (vif->cl != 0) //Check alignment for SSE unpacks { #ifdef _DEBUG @@ -474,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma int incdest; - if (vif->cl != 0) + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { // continuation from last stream @@ -491,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (vif->cl == vifRegs->cycle.wl) { dest += incdest; + vif->tag.addr += incdest * 4; vif->cl = 0; break; } dest += 4; + vif->tag.addr += 16; } - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } } + } + return size>>2; +} - if ((size >= ft->gsize) && !(v->addr&0xf)) + +static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +{ + u32 *dest; + u32 unpackType; + UNPACKFUNCTYPE func; + const VIFUnpackFuncTable *ft; + VURegs * VU; + u8 *cdata = (u8*)data; + +#ifdef _DEBUG + u32 memsize = VIFdmanum ? 0x4000 : 0x1000; +#endif + + _mm_prefetch((char*)data, _MM_HINT_NTA); + + if (VIFdmanum == 0) + { + VU = &VU0; + //vifRegs = vif0Regs; + assert(v->addr < memsize); + } + else + { + + VU = &VU1; + //vifRegs = vif1Regs; + assert(v->addr < memsize); + + if (vu1MicroIsSkipping()) + { + // don't process since the frame is dummy + vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); + return; + } + } + + dest = (u32*)(VU->Mem + v->addr); + + VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); + + VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); + + // The unpack type + unpackType = v->cmd & 0xf; + + _mm_prefetch((char*)data + 128, _MM_HINT_NTA); + + ft = &VIFfuncTable[ unpackType ]; + func = vif->usn ? ft->funcU : ft->funcS; + + size <<= 2; + +#ifdef _DEBUG + memsize = size; +#endif + + +#ifdef VIFUNPACKDEBUG + + if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Sanity Check (memory overflow) + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); + + } +#endif + + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + { + +#ifdef _DEBUG + static int s_count = 0; +#endif + + + if (size >= ft->gsize) { const UNPACKPARTFUNCTYPESSE* pfn; int writemask; @@ -554,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle; + if(vifRegs->mode == 2) + { + //Update the reg rows for non SSE + vifRegs->r0 = vifRow[0]; + vifRegs->r1 = vifRow[1]; + vifRegs->r2 = vifRow[2]; + vifRegs->r3 = vifRow[3]; + } + + // if size is left over, update the src,dst pointers if (writemask > 0) { @@ -561,107 +626,65 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma cdata += left * ft->gsize; dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16); vifRegs->num -= left; - _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = writemask; + + if (size >= ft->dsize && vifRegs->num > 0) + { + //VIF_LOG("warning, end with size = %d", size); + + /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; + func(dest, (u32*)cdata, size / ft->dsize); + size = 0; + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); + } } else { vifRegs->num -= size / ft->gsize; - if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = 0; } - size = writemask; - - _vifRegs->r0 = _vifRow[0]; - _vifRegs->r1 = _vifRow[1]; - _vifRegs->r2 = _vifRow[2]; - _vifRegs->r3 = _vifRow[3]; - } - else - { - - if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available - { - // v4-32 - if ((vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) - { - vifRegs->num -= size >> 4; - memcpy_fast((u8*)dest, cdata, size); - size = 0; - return; - } - } - - incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; - - while ((size >= ft->gsize) && (vifRegs->num > 0)) - { - func(dest, (u32*)cdata, ft->qsize); - cdata += ft->gsize; - size -= ft->gsize; - - vifRegs->num--; - ++vif->cl; - if (vif->cl == vifRegs->cycle.wl) - { - dest += incdest; - vif->cl = 0; - } - else - { - dest += 4; - } - } - - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; - } - - // used for debugging vif -// { -// int i, j, k; -// u32* curdest = olddest; -// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w"); -// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr); -// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle); -// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]); -// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3); -// -// for(i = 0; i < memsize; ) { -// for(k = 0; k < vifRegs->cycle.wl; ++k) { -// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) { -// fprintf(ftemp, "%x ", curdest[4*k+j]); -// } -// } -// -// fprintf(ftemp, "\n"); -// curdest += 4*vifRegs->cycle.cl; -// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl; -// } -// fclose(ftemp); -// } -// s_count++; - - if (size >= ft->dsize && vifRegs->num > 0) + } + else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); } - } else /* filling write */ { - VIF_LOG("VIFunpack - filling write"); + if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) + DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); + VIFUNPACK_LOG("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType); - while (size >= ft->gsize || vifRegs->num > 0) + while (vifRegs->num > 0) { if (vif->cl == vifRegs->cycle.wl) { @@ -679,6 +702,11 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { vif->cl = 0; } + if(size < ft->gsize) + { + VIF_LOG("Out of Filling write data"); + break; + } } else { @@ -786,17 +814,21 @@ static __forceinline void vif0UNPACK(u32 *data) len = ((((32 >> vl) * (vn + 1)) * n) + 31) >> 5; } - vif0.wl = 0; vif0.cl = 0; vif0.tag.cmd = vif0.cmd; vif0.tag.addr &= 0xfff; vif0.tag.size = len; vif0Regs->offset = 0; + + vifRegs = (VIFregisters*)vif0Regs; + vifMaskRegs = g_vif0Masks; + vif = &vif0; + vifRow = g_vifRow0; } -static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -900,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { if (vif0.vifpacketsize < vif0.tag.size) { - _vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); + vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); vif0.tag.addr += vif0.vifpacketsize << 2; vif0.tag.size -= vif0.vifpacketsize; return vif0.vifpacketsize; @@ -909,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { int ret; - _vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); + vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); ret = vif0.tag.size; vif0.tag.size = 0; vif0.cmd = 0; @@ -924,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK { /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); + + ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum); + vif0.tag.size -= vif0.vifpacketsize; FreezeXMMRegs(0); return vif0.vifpacketsize; @@ -931,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK else { /* we got all the data, transfer it fully */ - int ret; + int ret = vif0.tag.size; - VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); - ret = vif0.tag.size; - vif0.tag.size = 0; - vif0.cmd = 0; - FreezeXMMRegs(0); - return ret; + //Align data after a split transfer first + if(vif0Regs->offset != 0 || vif0.cl != 0) + { + vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + data += ret - vif0.tag.size; + if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } @@ -1516,15 +1564,20 @@ static __forceinline void vif1UNPACK(u32 *data) else vif1.tag.addr = vif1Regs->code & 0x3ff; + vif1Regs->offset = 0; vif1.cl = 0; vif1.tag.addr <<= 4; - vif1.tag.cmd = vif1.cmd; + + vifRegs = (VIFregisters*)vif1Regs; + vifMaskRegs = g_vif1Masks; + vif = &vif1; + vifRow = g_vifRow1; } -static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -1626,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data) { if (vif1.vifpacketsize < vif1.tag.size) { - _vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); + vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); vif1.tag.addr += vif1.vifpacketsize << 2; vif1.tag.size -= vif1.vifpacketsize; return vif1.vifpacketsize; @@ -1634,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data) else { int ret; - _vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); + vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); ret = vif1.tag.size; vif1.tag.size = 0; vif1.cmd = 0; @@ -1735,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data) /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); + + ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum); vif1.tag.size -= vif1.vifpacketsize; FreezeXMMRegs(0); return vif1.vifpacketsize; } else { - int ret; - /* we got all the data, transfer it fully */ - VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); - ret = vif1.tag.size; - vif1.tag.size = 0; - vif1.cmd = 0; - FreezeXMMRegs(0); - return ret; + int ret = vif1.tag.size; + + if(vif1Regs->offset != 0 || vif1.cl != 0) + { + vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + data += ret - vif1.tag.size; + if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + /* we got all the data, transfer it fully */ + VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h index a7fed1c30b..6af78214e7 100644 --- a/pcsx2/VifDma.h +++ b/pcsx2/VifDma.h @@ -32,7 +32,7 @@ struct vifStruct { int cmd; int irq; int cl; - int wl; + int qwcalign; u8 usn; // The next three should be boolean, and will be next time I break savestate compatability. --arcum42 diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index a11a52c051..ba2df10aca 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -947,7 +947,6 @@ + + @@ -2912,149 +2915,36 @@ > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - ExceptionRecord; @@ -50,7 +52,8 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps ) // get bad virtual address uptr addr=ExceptionRecord.ExceptionInformation[1]; - + u8* pcode=(u8*)ExceptionRecord.ExceptionAddress; + //this is a *hackfix* for a bug on x64 windows kernels.They do not give correct address //if the error is a missaligned access (they return 0) if (addr==0) @@ -60,16 +63,17 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps ) } u32 offset = addr-(uptr)psM; - if (addr&0x80000000) + if (addr&0x80000000 && ((pcode-recMem)<(16*1024*1024)) ) { uptr _vtlb_HandleRewrite(u32 info,u8* ra); - u8* pcode=(u8*)ExceptionRecord.ExceptionAddress; - u32 patch_point=1; + s32 patch_point=1; //01 C1 while(pcode[-patch_point]!=0x81 || pcode[-patch_point-1]!=0xC1 || pcode[-patch_point-2]!=0x01) { patch_point++; + if (patch_point>0x100) + return EXCEPTION_CONTINUE_SEARCH; } assert(pcode[-patch_point]==0x81); pcode[-patch_point]=0xF;//js32, 0x81 is add32 @@ -88,7 +92,7 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps ) return EXCEPTION_CONTINUE_EXECUTION; } else - { + { if (offset>=Ps2MemSize::Base) return EXCEPTION_CONTINUE_SEARCH; diff --git a/pcsx2/x86/BaseblockEx.cpp b/pcsx2/x86/BaseblockEx.cpp index 22a6765e60..bb933f301d 100644 --- a/pcsx2/x86/BaseblockEx.cpp +++ b/pcsx2/x86/BaseblockEx.cpp @@ -70,10 +70,27 @@ int BaseBlocks::LastIndex(u32 startpc) const return imin; } -BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) const +BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) { - // TODO - return 0; + if (0 == blocks.size()) + return 0; + + int imin = 0, imax = blocks.size() - 1, imid; + + while(imin != imax) { + imid = (imin+imax+1)>>1; + + if (blocks[imid].fnptr > ip) + imax = imid - 1; + else + imin = imid; + } + + if (ip < blocks[imin].fnptr || + ip >= blocks[imin].fnptr + blocks[imin].x86size) + return 0; + + return &blocks[imin]; } void BaseBlocks::Link(u32 pc, uptr jumpptr) diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 6ebe592d5e..6ef0261451 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -18,14 +18,9 @@ #pragma once -#include "PrecompiledHeader.h" -#include -#include +#include // used by BaseBlockEx #include -// used to keep block information -#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot - // Every potential jump point in the PS2's addressable memory has a BASEBLOCK // associated with it. So that means a BASEBLOCK for every 4 bytes of PS2 // addressable memory. Yay! @@ -73,7 +68,7 @@ public: BASEBLOCKEX* New(u32 startpc, uptr fnptr); int LastIndex (u32 startpc) const; - BASEBLOCKEX* GetByX86(uptr ip) const; + BASEBLOCKEX* GetByX86(uptr ip); inline int Index (u32 startpc) const { @@ -119,7 +114,6 @@ public: } }; -#define GET_BLOCKTYPE(b) ((b)->Type) #define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4))) static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000], diff --git a/pcsx2/x86/aVif.S b/pcsx2/x86/aVif.S index 05a2e9248f..e4b64685f6 100644 --- a/pcsx2/x86/aVif.S +++ b/pcsx2/x86/aVif.S @@ -18,9 +18,9 @@ */ .intel_syntax noprefix -.extern _vifRegs -.extern _vifMaskRegs -.extern _vifRow +.extern vifRegs +.extern vifMaskRegs +.extern vifRow #define VIF_ESP esp #define VIF_SRC esi @@ -108,7 +108,7 @@ // setting up masks #define UNPACK_Setup_Mask_SSE(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \ @@ -118,7 +118,7 @@ #define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL) #define UNPACK_Start_Setup_Mask_SSE_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ pand XMM_ROWMASK, XMM_ROW; \ @@ -129,12 +129,12 @@ #define UNPACK_Setup_Mask_SSE_0_1(CL) #define UNPACK_Setup_Mask_SSE_1_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ // ignore CL, since vif.cycle.wl == 1 #define UNPACK_Setup_Mask_SSE_2_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ @@ -1312,9 +1312,9 @@ #pragma warning(disable:4731) #define SAVE_ROW_REG_BASE \ - mov VIF_TMPADDR, _vifRow; \ + mov VIF_TMPADDR, vifRow; \ movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \ psrldq XMM_ROW, 4; \ movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \ @@ -1349,7 +1349,7 @@ .globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \ UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \ INIT_ARGS(); \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \ movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \ sub VIF_INC, VIF_SAVEEBX; \ diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 6ab0be3488..0ce3a2c5b5 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "iR5900.h" #include "Vif.h" #include "VU.h" diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 4fe4632b0a..e031a7a6c6 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -1956,14 +1956,14 @@ CPU_SSE_XMMCACHE_END // Both Macros are 16 bytes so we can use a shift instead of a Mul instruction #define QFSRVhelper0() { \ ajmp[0] = JMP32(0); \ - x86Ptr[0] += 11; \ + x86Ptr += 11; \ } #define QFSRVhelper(shift1, shift2) { \ SSE2_PSRLDQ_I8_to_XMM(EEREC_D, shift1); \ SSE2_PSLLDQ_I8_to_XMM(t0reg, shift2); \ ajmp[shift1] = JMP32(0); \ - x86Ptr[0] += 1; \ + x86Ptr += 1; \ } void recQFSRV() @@ -1982,8 +1982,8 @@ void recQFSRV() MOV32MtoR(EAX, (uptr)&cpuRegs.sa); SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) - AND32I8toR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) - ADD32ItoEAX((uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes + AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) + ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) // Case 0: @@ -2676,9 +2676,6 @@ CPU_SSE_XMMCACHE_END recCall( Interp::PHMADH, _Rd_ ); } -//////////////////////////////////////////////////// -//upper word of each doubleword in LO and HI is undocumented/undefined -//contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPMSUBH() { CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) @@ -2740,12 +2737,8 @@ CPU_SSE_XMMCACHE_END } //////////////////////////////////////////////////// - -// rs = ... a1 a0 -// rt = ... b1 b0 -// rd = ... a1*b1 - a0*b0 -// hi = ... -// lo = ... (undefined by doc)NOT(a1*b1), a1*b1 - a0*b0 +//upper word of each doubleword in LO and HI is undocumented/undefined +//it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPHMSBH() { CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index a1b1ec756d..38e2021609 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -24,6 +24,8 @@ #include "PrecompiledHeader.h" #include "iR3000A.h" +#include "BaseblockEx.h" + #include #ifndef _WIN32 @@ -171,7 +173,7 @@ static void iIopDumpBlock( int startpc, u8 * ptr ) #ifdef __LINUX__ // dump the asm f = fopen( "mydump1", "wb" ); - fwrite( ptr, 1, (uptr)x86Ptr[0] - (uptr)ptr, f ); + fwrite( ptr, 1, (uptr)x86Ptr - (uptr)ptr, f ); fclose( f ); sprintf( command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 | cat %s - > tempdump", filename ); system( command ); @@ -316,7 +318,7 @@ void _psxMoveGPRtoM(u32 to, int fromgpr) void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr) { if( PSX_IS_CONST1(fromgpr) ) - MOV32ItoRmOffset( to, g_psxConstRegs[fromgpr], 0 ); + MOV32ItoRm( to, g_psxConstRegs[fromgpr] ); else { // check x86 MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[ fromgpr ] ); @@ -647,7 +649,7 @@ static void recExecute() //for (;;) R3000AExecute(); } -static s32 recExecuteBlock( s32 eeCycles ) +static __forceinline s32 recExecuteBlock( s32 eeCycles ) { psxBreak = 0; psxCycleEE = eeCycles; @@ -741,7 +743,7 @@ static __forceinline u32 psxRecClearMem(u32 pc) return upperextent - pc; } -static void recClear(u32 Addr, u32 Size) +static __forceinline void recClearIOP(u32 Addr, u32 Size) { u32 pc = Addr; while (pc < Addr + Size*4) @@ -772,7 +774,7 @@ void psxSetBranchReg(u32 reg) _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); } void psxSetBranchImm( u32 imm ) @@ -796,7 +798,7 @@ void psxSetBranchImm( u32 imm ) // So for now these are new settings that work. // (rama) -static u32 psxScaleBlockCycles() +static __forceinline u32 psxScaleBlockCycles() { return s_psxBlockCycles * (CHECK_IOP_CYCLERATE ? 2 : 1); } @@ -828,7 +830,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) if( newpc != 0xffffffff ) { CMP32ItoM((uptr)&psxRegs.pc, newpc); - JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 6 )); + JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 6 )); } // Skip branch jump target here: @@ -864,7 +866,7 @@ void rpsxSYSCALL() ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); // jump target for skipping blockCycle updates x86SetJ8(j8Ptr[0]); @@ -884,7 +886,7 @@ void rpsxBREAK() j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //if (!psxbranch) psxbranch = 2; @@ -1004,7 +1006,7 @@ void iopRecRecompile(u32 startpc) x86SetPtr( recPtr ); x86Align(16); - recPtr = x86Ptr[_EmitterId_]; + recPtr = x86Ptr; s_pCurBlock = PSX_GETBLOCK(startpc); @@ -1025,7 +1027,7 @@ void iopRecRecompile(u32 startpc) psxbranch = 0; - s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] ); + s_pCurBlock->SetFnptr( (uptr)x86Ptr ); s_psxBlockCycles = 0; // reset recomp state variables @@ -1160,7 +1162,7 @@ StartRecomp: iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); } else { if( psxbranch ) assert( !willbranch3 ); @@ -1180,12 +1182,12 @@ StartRecomp: } } - assert( x86Ptr[0] < recMem+RECMEM_SIZE ); + assert( x86Ptr < recMem+RECMEM_SIZE ); - assert(x86Ptr[_EmitterId_] - recPtr < 0x10000); - s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr; + assert(x86Ptr - recPtr < 0x10000); + s_pCurBlockEx->x86size = x86Ptr - recPtr; - recPtr = x86Ptr[0]; + recPtr = x86Ptr; assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); @@ -1198,7 +1200,7 @@ R3000Acpu psxRec = { recResetIOP, recExecute, recExecuteBlock, - recClear, + recClearIOP, recShutdown }; diff --git a/pcsx2/x86/iR3000A.h b/pcsx2/x86/iR3000A.h index ebf78ec3cb..220eb33ff0 100644 --- a/pcsx2/x86/iR3000A.h +++ b/pcsx2/x86/iR3000A.h @@ -18,12 +18,10 @@ #ifndef _R3000A_SUPERREC_ #define _R3000A_SUPERREC_ -#define _EmitterId_ EmitterId_R3000a #include "ix86/ix86.h" #include "R3000A.h" #include "iCore.h" -#include "BaseblockEx.h" // Cycle penalties for particularly slow instructions. static const int psxInstCycles_Mult = 7; diff --git a/pcsx2/x86/iR3000Atables.cpp b/pcsx2/x86/iR3000Atables.cpp index 1b1f4486e6..fddd6f9690 100644 --- a/pcsx2/x86/iR3000Atables.cpp +++ b/pcsx2/x86/iR3000Atables.cpp @@ -1258,7 +1258,7 @@ void rpsxJALR() static void* s_pbranchjmp; static u32 s_do32 = 0; -#define JUMPVALID(pjmp) (( x86Ptr[0] - (u8*)pjmp ) <= 0x80) +#define JUMPVALID(pjmp) (( x86Ptr - (u8*)pjmp ) <= 0x80) void rpsxSetBranchEQ(int info, int process) { @@ -1305,7 +1305,7 @@ void rpsxBEQ_process(int info, int process) else { _psxFlushAllUnused(); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; s_do32 = 0; psxSaveBranchState(); @@ -1318,7 +1318,7 @@ void rpsxBEQ_process(int info, int process) x86SetJ8A( (u8*)s_pbranchjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); s_do32 = 1; psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); @@ -1369,7 +1369,7 @@ void rpsxBNE_process(int info, int process) } _psxFlushAllUnused(); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; s_do32 = 0; rpsxSetBranchEQ(info, process); @@ -1381,7 +1381,7 @@ void rpsxBNE_process(int info, int process) x86SetJ8A( (u8*)s_pbranchjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); s_do32 = 1; psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); @@ -1423,7 +1423,7 @@ void rpsxBLTZ() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JL8(0); psxSaveBranchState(); @@ -1435,7 +1435,7 @@ void rpsxBLTZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1470,7 +1470,7 @@ void rpsxBGEZ() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JGE8(0); psxSaveBranchState(); @@ -1482,7 +1482,7 @@ void rpsxBGEZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1524,7 +1524,7 @@ void rpsxBLTZAL() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JL8(0); psxSaveBranchState(); @@ -1538,7 +1538,7 @@ void rpsxBLTZAL() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1577,7 +1577,7 @@ void rpsxBGEZAL() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JGE8(0); MOV32ItoM((uptr)&psxRegs.GPR.r[31], psxpc+4); @@ -1591,7 +1591,7 @@ void rpsxBGEZAL() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1631,7 +1631,7 @@ void rpsxBLEZ() _clearNeededX86regs(); CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JLE8(0); psxSaveBranchState(); @@ -1642,7 +1642,7 @@ void rpsxBLEZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1679,7 +1679,7 @@ void rpsxBGTZ() _clearNeededX86regs(); CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JG8(0); psxSaveBranchState(); @@ -1690,7 +1690,7 @@ void rpsxBGTZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 15fc5ef33b..5f0644c073 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -19,13 +19,11 @@ #ifndef __IR5900_H__ #define __IR5900_H__ -#define _EmitterId_ EmitterId_R5900 #include "ix86/ix86.h" #include "ix86/ix86_sse_helpers.h" #include "R5900.h" #include "VU.h" #include "iCore.h" -#include "BaseblockEx.h" // needed for recClear and stuff // Yay! These work now! (air) ... almost (air) #define ARITHMETICIMM_RECOMPILE diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 4b70381294..55af7a5c54 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -23,11 +23,12 @@ #include "VUmicro.h" #include "iVUzerorec.h" +#ifndef PCSX2_MICROVU_ namespace VU0micro { - void recAlloc() - { - SuperVUAlloc(0); + void recAlloc() + { + SuperVUAlloc(0); } void __fastcall recClear(u32 Addr, u32 Size) @@ -62,6 +63,34 @@ namespace VU0micro FreezeXMMRegs(0); } } +#else + +extern void initVUrec(VURegs* vuRegs, const int vuIndex); +extern void closeVUrec(const int vuIndex); +extern void resetVUrec(const int vuIndex); +extern void clearVUrec(u32 addr, u32 size, const int vuIndex); +extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex); + +namespace VU0micro +{ + void recAlloc() { initVUrec(&VU0, 0); } + void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 0); } + void recShutdown() { closeVUrec(0); } + static void recReset() { resetVUrec(0); x86FpuState = FPU_STATE; } + static void recStep() {} + static void recExecuteBlock() + { + if((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; + + FreezeXMMRegs(1); + FreezeMMXRegs(1); + runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0); + FreezeXMMRegs(0); + FreezeMMXRegs(0); + } + +} +#endif using namespace VU0micro; diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 2c4cc00bb4..7debdcfa8e 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -29,7 +29,7 @@ #ifdef _DEBUG extern u32 vudump; #endif - +#ifndef PCSX2_MICROVU_ namespace VU1micro { void recAlloc() @@ -121,6 +121,34 @@ namespace VU1micro FreezeXMMRegs(0); } } +#else + +extern void initVUrec(VURegs* vuRegs, const int vuIndex); +extern void closeVUrec(const int vuIndex); +extern void resetVUrec(const int vuIndex); +extern void clearVUrec(u32 addr, u32 size, const int vuIndex); +extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex); + +namespace VU1micro +{ + void recAlloc() { initVUrec(&VU1, 1); } + void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 1); } + void recShutdown() { closeVUrec(1); } + static void recReset() { resetVUrec(1); x86FpuState = FPU_STATE; } + static void recStep() {} + static void recExecuteBlock() { + + if((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) return; + assert( (VU1.VI[REG_TPC].UL&7) == 0 ); + + FreezeXMMRegs(1); + FreezeMMXRegs(0); + runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1); + FreezeXMMRegs(0); + FreezeMMXRegs(0); + } +} +#endif using namespace VU1micro; diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index 2422e5ad10..9335151798 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -280,6 +280,7 @@ void _recvuIALUTestStall(VURegs * VU, int reg) { VU->ialu[i].enable = 0; vucycle+= cycle; + _recvuTestPipes(VU, true); } void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { @@ -387,7 +388,7 @@ void _recvuFlushFDIV(VURegs * VU) { if (VU->fdiv.enable == 0) return; - cycle = VU->fdiv.Cycle - (vucycle - VU->fdiv.sCycle); + cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12) // Console::WriteLn("waiting FDIV pipe %d", params cycle); VU->fdiv.enable = 0; vucycle+= cycle; diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 58156d0926..9f9d816cf7 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -354,7 +354,7 @@ void recVUMI_IADD( VURegs *VU, int info ) if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg); else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg); - else LEA16RRtoR(fdreg, fsreg, ftreg); + else LEA32RRtoR(fdreg, fsreg, ftreg); MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0 } } @@ -609,31 +609,31 @@ void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info) if( x86reg >= 0 ) { switch(_X_Y_Z_W) { case 3: // ZW - SSE_MOVHPS_RmOffset_to_XMM(EEREC_T, x86reg, offset+8); + SSE_MOVHPS_Rm_to_XMM(EEREC_T, x86reg, offset+8); break; case 6: // YZ - SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0x9c); + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0x9c); SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); break; case 8: // X - SSE_MOVSS_RmOffset_to_XMM(EEREC_TEMP, x86reg, offset); + SSE_MOVSS_Rm_to_XMM(EEREC_TEMP, x86reg, offset); SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); break; case 9: // XW - SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0xc9); + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0xc9); SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); break; case 12: // XY - SSE_MOVLPS_RmOffset_to_XMM(EEREC_T, x86reg, offset); + SSE_MOVLPS_Rm_to_XMM(EEREC_T, x86reg, offset); break; case 15: - if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_T, x86reg, offset); - else SSE_MOVUPSRmtoROffset(EEREC_T, x86reg, offset); + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_T, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_T, x86reg, offset); break; default: - if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset); - else SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset); + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_TEMP, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_TEMP, x86reg, offset); VU_MERGE_REGS(EEREC_T, EEREC_TEMP); break; @@ -795,15 +795,15 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) if ( _Fs_ == 0 ) { if ( _XYZW_SS ) { u32 c = _W ? 0x3f800000 : 0; - if ( x86reg >= 0 ) MOV32ItoRmOffset(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); + if ( x86reg >= 0 ) MOV32ItoRm(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); } else { if ( x86reg >= 0 ) { - if ( _X ) MOV32ItoRmOffset(x86reg, 0x00000000, offset); - if ( _Y ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+4); - if ( _Z ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+8); - if ( _W ) MOV32ItoRmOffset(x86reg, 0x3f800000, offset+12); + if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset); + if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4); + if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8); + if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12); } else { if ( _X ) MOV32ItoM(offset, 0x00000000); @@ -818,29 +818,29 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) switch ( _X_Y_Z_W ) { case 1: // W SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x27); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); break; case 2: // Z SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); break; case 3: // ZW - if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8); + if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); break; case 4: // Y SSE2_PSHUFLW_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4e); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_TEMP); break; case 5: // YW SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset+4); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S); @@ -850,14 +850,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 6: // YZ SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xc9); - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_TEMP); break; case 7: // YZW SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x93); //ZYXW if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVHPS_XMM_to_M64(offset+4, EEREC_TEMP); @@ -865,26 +865,26 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) } break; case 8: // X - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); break; case 9: // XW SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); break; case 10: //XZ SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); } else { SSE_MOVSS_XMM_to_M32(offset, EEREC_S); @@ -893,8 +893,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 11: //XZW if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); } else { SSE_MOVSS_XMM_to_M32(offset, EEREC_S); @@ -902,14 +902,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) } break; case 12: // XY - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); break; case 13: // XYW SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4b); //YXZW if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+0); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVHPS_XMM_to_M64(offset, EEREC_TEMP); @@ -919,8 +919,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) case 14: // XYZ SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); } else { SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); @@ -929,11 +929,11 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 15: // XYZW if ( VU == &VU1 ) { - if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_S, offset+0); + if( x86reg >= 0 ) SSE_MOVAPSRtoRm(x86reg, EEREC_S, offset+0); else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); } else { - if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_S, offset+0); + if( x86reg >= 0 ) SSE_MOVUPSRtoRm(x86reg, EEREC_S, offset+0); else { if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S); else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); @@ -1018,7 +1018,7 @@ void recVUMI_ILW(VURegs *VU, int info) } else { int fsreg = ALLOCVI(_Fs_, MODE_READ); - MOV32RmtoROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off); + MOV32RmtoR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off); } } //------------------------------------------------------------------ @@ -1051,10 +1051,10 @@ void recVUMI_ISW( VURegs *VU, int info ) x86reg = recVUTransformAddr(fsreg, VU, _Fs_, imm); - if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12); + if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12); } } //------------------------------------------------------------------ @@ -1082,7 +1082,7 @@ void recVUMI_ILWR( VURegs *VU, int info ) } else { int fsreg = ALLOCVI(_Fs_, MODE_READ); - MOVZX32Rm16toROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off); + MOVZX32Rm16toR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off); } } //------------------------------------------------------------------ @@ -1109,10 +1109,10 @@ void recVUMI_ISWR( VURegs *VU, int info ) int fsreg = ALLOCVI(_Fs_, MODE_READ); x86reg = recVUTransformAddr(fsreg, VU, _Fs_, 0); - if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12); + if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12); } } //------------------------------------------------------------------ diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 4dea960b87..0f454e638e 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -58,7 +58,7 @@ extern void iDumpVU1Registers(); #define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on... #ifndef _DEBUG -#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) +//#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) ) #endif #define SUPERVU_CHECKCONDITION 0 // has to be 0!! @@ -833,7 +833,7 @@ static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) SuperVURecompile(); - s_recVUPtr = x86Ptr[0]; + s_recVUPtr = x86Ptr; // set the function's range VuFunctionHeader::RANGE r; @@ -1889,7 +1889,7 @@ void VuBaseBlock::AssignVFRegs() if( i == XMMREGS ) return; // nothing changed } - u8* oldX86 = x86Ptr[0]; + u8* oldX86 = x86Ptr; FORIT(itinst, insts) { @@ -2060,9 +2060,9 @@ void VuBaseBlock::AssignVFRegs() _freeXMMreg(free1); _freeXMMreg(free2); } - else if( regs->VIwrite & (1<VIwrite & (1<VIwrite & (1<VIwrite & (1<VIread & (1<vifRegs->stat, ~0x4 ); MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); } // only other case is when there are two branches else assert( (*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH ); @@ -2606,11 +2607,11 @@ void SuperVUTestVU0Condition(u32 incstack) ADD32ItoR(ESP, incstack); //CALLFunc((u32)timeout); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); x86SetJ8(ptr); } - else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 6 ) ); + else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 6 ) ); } void VuBaseBlock::Recompile() @@ -2618,7 +2619,7 @@ void VuBaseBlock::Recompile() if( type & BLOCKTYPE_ANALYZED ) return; x86Align(16); - pcode = x86Ptr[0]; + pcode = x86Ptr; #ifdef _DEBUG MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc); @@ -2726,7 +2727,7 @@ void VuBaseBlock::Recompile() AND32ItoM( (uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu?~0x100:~0x001 ); // E flag AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 ); if( !branch ) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); } else { @@ -2868,7 +2869,7 @@ void VuBaseBlock::Recompile() } } - pendcode = x86Ptr[0]; + pendcode = x86Ptr; type |= BLOCKTYPE_ANALYZED; LISTBLOCKS::iterator itchild; @@ -3569,7 +3570,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1; if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) { j8Ptr[1] = JMP8(0); @@ -3578,7 +3579,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1; x86SetJ8( j8Ptr[ 1 ] ); } @@ -3815,7 +3816,7 @@ void recVUMI_B( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } @@ -3841,7 +3842,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } diff --git a/pcsx2/x86/ix86-32/aVif_proc-32.asm b/pcsx2/x86/ix86-32/aVif_proc-32.asm index 12c8b969b4..62fd377795 100644 --- a/pcsx2/x86/ix86-32/aVif_proc-32.asm +++ b/pcsx2/x86/ix86-32/aVif_proc-32.asm @@ -5,9 +5,9 @@ .xmm -extern _vifRegs:ptr -extern _vifMaskRegs:ptr -extern _vifRow:ptr +extern vifRegs:ptr +extern vifMaskRegs:ptr +extern vifRow:ptr extern s_TempDecompress:ptr @@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0 UNPACK_Setup_Mask_SSE macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] movdqa xmm3, [eax + 64*(CL)] @@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL endm UNPACK_Start_Setup_Mask_SSE_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] pand xmm4, xmm6 @@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL UNPACK_Setup_Mask_SSE_0_1 macro CL endm UNPACK_Setup_Mask_SSE_1_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm3, [eax + 64*(0)] endm UNPACK_Setup_Mask_SSE_2_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(0) + 16] movdqa xmm5, [eax + 64*(0) + 32] movdqa xmm3, [eax + 64*(0)] @@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType SAVE_ROW_REG_BASE macro - mov eax, [_vifRow] + mov eax, [vifRow] movdqa [eax], xmm6 - mov eax, [_vifRegs] + mov eax, [vifRegs] movss dword ptr [eax+0100h], xmm6 psrldq xmm6, 4 movss dword ptr [eax+0110h], xmm6 @@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE push ebx INIT_ARGS - mov eax, [_vifRegs] + mov eax, [vifRegs] movzx ecx, byte ptr [eax + 040h] movzx ebx, byte ptr [eax + 041h] sub ecx, ebx diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 1021644be3..9904f342a1 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "iR5900.h" #include "Vif.h" #include "VU.h" @@ -161,7 +161,7 @@ void _flushConstRegs() zero_cnt++; } - rewindPtr = x86Ptr[_EmitterId_]; + rewindPtr = x86Ptr; for (i = 1, j = 0; i < 32; j++ && ++i, j %= 2) { if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1< %s", filename ); @@ -367,7 +373,7 @@ void _eeMoveGPRtoM(u32 to, int fromgpr) void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) { if( GPR_IS_CONST1(fromgpr) ) - MOV32ItoRmOffset( to, g_cpuConstRegs[fromgpr].UL[0], 0 ); + MOV32ItoRm( to, g_cpuConstRegs[fromgpr].UL[0] ); else { int mmreg; @@ -380,7 +386,7 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) } else { MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] ); - MOV32RtoRm(to, EAX ); + MOV32RtoRm( to, EAX ); } } } @@ -579,8 +585,8 @@ void recResetEE( void ) // so a fix will have to wait until later. -_- (air) //x86SetPtr(recMem+REC_CACHEMEM); - //dyna_block_discard_recmem=(u8*)x86Ptr[0]; - //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr[0] + 5 )); + //dyna_block_discard_recmem=(u8*)x86Ptr; + //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 )); x86SetPtr(recMem); @@ -677,7 +683,7 @@ static void __naked DispatcherReg() } } -__forceinline void recExecute() +void recExecute() { // Optimization note : Compared pushad against manually pushing the regs one-by-one. // Manually pushing is faster, especially on Core2's and such. :) @@ -791,7 +797,7 @@ void recSYSCALL( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); - JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //branch = 2; } @@ -1148,7 +1154,7 @@ static void iBranchTest(u32 newpc, bool noDispatch) if (!noDispatch) { if (newpc == 0xffffffff) - JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 )); + JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); else iBranch(newpc, 1); } @@ -1379,7 +1385,7 @@ void recRecompile( const u32 startpc ) x86SetPtr( recPtr ); x86Align(16); - recPtr = x86Ptr[_EmitterId_]; + recPtr = x86Ptr; s_pCurBlock = PC_GETBLOCK(startpc); @@ -1732,8 +1738,11 @@ StartRecomp: if (bit==31) { vtlb_alloc_bits[writen_start]&=~mask; - TEST32ItoM((uptr)&vtlb_alloc_bits[writen_start],mask); - JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 )); + if ((u8)mask==mask) + TEST8ItoM((uptr)&vtlb_alloc_bits[writen_start],mask); + else + TEST32ItoM((uptr)&vtlb_alloc_bits[writen_start],mask); + JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 )); SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4); mask=0; } @@ -1755,8 +1764,11 @@ StartRecomp: if (mask) { vtlb_alloc_bits[writen_start]&=~mask; - TEST32ItoM((uptr)&vtlb_alloc_bits[writen_start],mask); - JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 )); + if ((u8)mask==mask) + TEST8ItoM((uptr)&vtlb_alloc_bits[writen_start],mask); + else + TEST32ItoM((uptr)&vtlb_alloc_bits[writen_start],mask); + JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 )); SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4); mask=0; } @@ -1768,14 +1780,14 @@ StartRecomp: { // was dyna_block_discard_recmem. See note in recResetEE for details. CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc)); - JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 )); + JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 )); stg-=4; lpc+=4; } */ - DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params - startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4); + //DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params + // startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4); } } inpage_ptr+=pgsz; @@ -1855,14 +1867,14 @@ StartRecomp: } } - assert( x86Ptr[0] < recMem+REC_CACHEMEM ); + assert( x86Ptr < recMem+REC_CACHEMEM ); assert( recStackPtr < recStack+RECSTACK_SIZE ); assert( x86FpuState == 0 ); - assert(x86Ptr[_EmitterId_] - recPtr < 0x10000); - s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr; + assert(x86Ptr - recPtr < 0x10000); + s_pCurBlockEx->x86size = x86Ptr - recPtr; - recPtr = x86Ptr[0]; + recPtr = x86Ptr; assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 83925362c3..aaf93d1d47 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -1930,7 +1930,7 @@ void recLQC2( void ) dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0); if( _Ft_ ) { - u8* rawreadptr = x86Ptr[0]; + u8* rawreadptr = x86Ptr; if( mmreg >= 0 ) { SSEX_MOVDQARmtoROffset(mmreg, ECX, PS2MEM_BASE_+s_nAddMemOffset); @@ -1945,7 +1945,7 @@ void recLQC2( void ) // check if writing to VUs CMP32ItoR(ECX, 0x11000000); - JAE8(rawreadptr - (x86Ptr[0]+2)); + JAE8(rawreadptr - (x86Ptr+2)); PUSH32I( (int)&VU0.VF[_Ft_].UD[0] ); CALLFunc( (int)recMemRead128 ); @@ -1999,7 +1999,7 @@ void recSQC2( void ) mmregs = _eePrepareReg(_Rs_); dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0); - rawreadptr = x86Ptr[0]; + rawreadptr = x86Ptr; if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) { SSEX_MOVDQARtoRmOffset(ECX, mmreg, PS2MEM_BASE_+s_nAddMemOffset); @@ -2039,7 +2039,7 @@ void recSQC2( void ) // check if writing to VUs CMP32ItoR(ECX, 0x11000000); - JAE8(rawreadptr - (x86Ptr[0]+2)); + JAE8(rawreadptr - (x86Ptr+2)); // some type of hardware write if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) { @@ -2101,7 +2101,7 @@ void recLoad64( u32 bits, bool sign ) if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ ); if( bits == 128 ) // force 16 byte alignment on 128 bit reads - AND32I8toR(ECX,0xF0); + AND32ItoR(ECX,~0x0F); // emitter automatically encodes this as an 8-bit sign-extended imm8 _eeOnLoadWrite(_Rt_); EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension @@ -2198,7 +2198,7 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false) if ( _Imm_ != 0 ) ADD32ItoR(ECX, _Imm_); if (sz==128) - AND32I8toR(ECX,0xF0); + AND32ItoR(ECX,~0x0F); vtlb_DynGenWrite(sz); } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 243acb01be..3bdc53a21f 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -23,6 +23,7 @@ #include "iCore.h" #include "iR5900.h" +#include "x86\ix86\ix86_internal.h" u8* code_pos=0; u8* code_start=0; @@ -63,7 +64,7 @@ void execuCode(bool set) SysPrintf("Leaking 2 megabytes of ram\n"); code_start=code_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE); code_sz+=2*1024*1024; - int i=0; + u32 i=0; while(i,imm, 6 bytes form. - write8<_EmitterId_>( 0x81 ); - ModRM<_EmitterId_>( 3, 0, EAX ); + write8( 0x81 ); + ModRM( 3, 0, EAX ); u8* rv=x86SetPtr(0); - write32<_EmitterId_>(0); + write32(0); return rv; } @@ -106,10 +107,10 @@ void IndirectPlaceholderB(u8* pl,bool read,u32 sz,bool sx) u8* old=x86SetPtr(pl); inf.skip=old-pl-4; //Add32 ,imm, 6 bytes form, patch the imm value - write32<_EmitterId_>( inf.full ); + write32( inf.full ); x86SetPtr(old); } -PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] ); +PCSX2_ALIGNED16( extern u64 g_globalXMMData[2*XMMREGS] ); void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0,bool half=false ) { int reg; @@ -130,24 +131,24 @@ void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAd if (srcAddr) SSE_MOVLPS_M64_to_XMM(reg,srcAddr); else - SSE_MOVLPS_RmOffset_to_XMM(reg,srcRm,0); + SSE_MOVLPS_Rm_to_XMM(reg,srcRm); if (dstAddr) SSE_MOVLPS_XMM_to_M64(dstAddr,reg); else - SSE_MOVLPS_XMM_to_RmOffset(destRm,reg,0); + SSE_MOVLPS_XMM_to_Rm(destRm,reg); } else { if (srcAddr) SSE2_MOVDQA_M128_to_XMM(reg,srcAddr); else - SSE2_MOVDQARmtoROffset(reg,srcRm,0); + SSE2_MOVDQARmtoR(reg,srcRm); if (dstAddr) SSE2_MOVDQA_XMM_to_M128(dstAddr,reg); else - SSE2_MOVDQARtoRmOffset(destRm,reg,0); + SSE2_MOVDQARtoRm(destRm,reg); } @@ -167,12 +168,12 @@ void MOV64_MMX( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstA if (srcAddr) MOVQMtoR(freereg,srcAddr); else - MOVQRmtoROffset(freereg,srcRm,0); + MOVQRmtoR(freereg,srcRm); if (dstAddr) MOVQRtoM(dstAddr,freereg); else - MOVQRtoRmOffset(destRm,freereg,0); + MOVQRtoRm(destRm,freereg); _freeMMXreg(freereg); } @@ -482,7 +483,6 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) bits_base-=(alloc_base>>4)/8;//in bytes BTS32MtoR(bits_base,ECX); -// BTS_wtf(asdasd,ECX); } static void _vtlb_DynGen_IndirectWrite( u32 bits ) @@ -614,9 +614,8 @@ uptr _vtlb_HandleRewrite(u32 info,u8* ra) u32 skip=GenIndirectMemOp(info); - JMP32(ra-x86Ptr[_EmitterId_]-5+skip); - + JMP32(ra-x86Ptr-5+skip); execuCode(false); return rv; -} \ No newline at end of file +} diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 7f76f134be..30e16e264a 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,4 +1,5 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h \ No newline at end of file +libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_group1.cpp \ +ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 11acfce6b5..f62cabf789 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -27,15 +27,465 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86.h" +#include "ix86_internal.h" -u8 *x86Ptr[EmitterId_Count]; - -u8 *j8Ptr[32]; -u32 *j32Ptr[32]; +__threadlocal u8 *x86Ptr; +__threadlocal u8 *j8Ptr[32]; +__threadlocal u32 *j32Ptr[32]; PCSX2_ALIGNED16(u32 p[4]); PCSX2_ALIGNED16(u32 p2[4]); PCSX2_ALIGNED16(float f[4]); XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; + +namespace x86Emitter { + +const x86IndexerType ptr; + +////////////////////////////////////////////////////////////////////////////////////////// +// +const x86Register32 x86Register32::Empty( -1 ); + +const x86Register32 eax( 0 ); +const x86Register32 ebx( 3 ); +const x86Register32 ecx( 1 ); +const x86Register32 edx( 2 ); +const x86Register32 esi( 6 ); +const x86Register32 edi( 7 ); +const x86Register32 ebp( 5 ); +const x86Register32 esp( 4 ); + +const x86Register16 ax( 0 ); +const x86Register16 bx( 3 ); +const x86Register16 cx( 1 ); +const x86Register16 dx( 2 ); +const x86Register16 si( 6 ); +const x86Register16 di( 7 ); +const x86Register16 bp( 5 ); +const x86Register16 sp( 4 ); + +const x86Register8 al( 0 ); +const x86Register8 cl( 1 ); +const x86Register8 dl( 2 ); +const x86Register8 bl( 3 ); +const x86Register8 ah( 4 ); +const x86Register8 ch( 5 ); +const x86Register8 dh( 6 ); +const x86Register8 bh( 7 ); + +////////////////////////////////////////////////////////////////////////////////////////// +// x86Register Method Implementations +// +x86ModRm x86Register32::operator+( const x86Register32& right ) const +{ + return x86ModRm( *this, right ); +} + +x86ModRm x86Register32::operator+( const x86ModRm& right ) const +{ + return right + *this; +} + +x86ModRm x86Register32::operator+( s32 right ) const +{ + return x86ModRm( *this, right ); +} + +x86ModRm x86Register32::operator*( u32 right ) const +{ + return x86ModRm( Empty, *this, right ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// x86ModRm Method Implementations +// +x86ModRm& x86ModRm::Add( const x86IndexReg& src ) +{ + if( src == Index ) + { + Factor++; + } + else if( src == Base ) + { + // Compound the existing register reference into the Index/Scale pair. + Base = x86IndexReg::Empty; + + if( src == Index ) + Factor++; + else + { + jASSUME( Index.IsEmpty() ); // or die if we already have an index! + Index = src; + Factor = 2; + } + } + else if( Base.IsEmpty() ) + Base = src; + else if( Index.IsEmpty() ) + Index = src; + else + assert( false ); // oops, only 2 regs allowed per ModRm! + + return *this; +} + +x86ModRm& x86ModRm::Add( const x86ModRm& src ) +{ + Add( src.Base ); + Add( src.Displacement ); + + // If the factor is 1, we can just treat index like a base register also. + if( src.Factor == 1 ) + { + Add( src.Index ); + } + else if( Index.IsEmpty() ) + { + Index = src.Index; + Factor = 1; + } + else if( Index == src.Index ) + Factor++; + else + assert( false ); // oops, only 2 regs allowed! + + return *this; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// ModSib Method Implementations +// + +// ------------------------------------------------------------------------ +// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. +// Necessary because by default ModSib compounds registers into Index when possible. +// +void ModSib::Reduce() +{ + // If no index reg, then load the base register into the index slot. + if( Index.IsEmpty() ) + { + Index = Base; + Scale = 0; + Base = x86IndexReg::Empty; + return; + } + + // The Scale has a series of valid forms, all shown here: + + switch( Scale ) + { + case 0: break; + case 1: Scale = 0; break; + case 2: Scale = 1; break; + + case 3: // becomes [reg*2+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 1; + break; + + case 4: Scale = 2; break; + + case 5: // becomes [reg*4+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 2; + break; + + case 6: // invalid! + assert( false ); + break; + + case 7: // so invalid! + assert( false ); + break; + + case 8: Scale = 3; break; + case 9: // becomes [reg*8+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 3; + break; + } +} + +ModSib::ModSib( const x86ModRm& src ) : + Base( src.Base ), + Index( src.Index ), + Scale( src.Factor ), + Displacement( src.Displacement ) +{ + Reduce(); +} + +ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) : + Base( base ), + Index( index ), + Scale( scale ), + Displacement( displacement ) +{ + Reduce(); +} + +ModSib::ModSib( s32 displacement ) : + Base(), + Index(), + Scale(0), + Displacement( displacement ) +{ +} + +// ------------------------------------------------------------------------ +// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the +// instruction ca be encoded as ModRm alone. +bool NeedsSibMagic( const ModSib& info ) +{ + // no registers? no sibs! + if( info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() ) return true; + + // If index register is ESP, then we need a SIB: + // (the ModSib::Reduce() ensures that stand-alone ESP will be in the + // index position for us) + if( info.Index == esp ) return true; + + return false; +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +void EmitSibMagic( int regfield, const ModSib& info ) +{ + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + if( info.Index.IsEmpty() ) + ModRM( 0, regfield, ModRm_UseDisp32 ); + else + { + if( info.Index == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, info.Index.Id ); + } + } + else + { + // In order to encode "just" index*scale (and no base), we have to encode + // it as a special [index*scale + displacement] form, which is done by + // specifying EBP as the base register and setting the displacement field + // to zero. (same as ModRm w/o SIB form above, basically, except the + // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). + + if( info.Base.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); + displacement_size = 2; + } + else + { + if( info.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, info.Base.Id ); + } + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; + jNO_DEFAULT + } +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info ) +{ + EmitSibMagic( regfield.Id, info ); +} + +template< typename ToReg > +static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) +{ + int displacement_size = (src.Displacement == 0) ? 0 : + ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); + + // See EmitSibMagic for commenting on SIB encoding. + + if( !NeedsSibMagic( src ) ) + { + // LEA Land: means we have either 1-register encoding or just an offset. + // offset is encodable as an immediate MOV, and a register is encodable + // as a register MOV. + + if( src.Index.IsEmpty() ) + { + if( is16bit ) + MOV16ItoR( to.Id, src.Displacement ); + else + MOV32ItoR( to.Id, src.Displacement ); + return; + } + else if( displacement_size == 0 ) + { + if( is16bit ) + MOV16RtoR( to.Id, src.Index.Id ); + else + MOV32RtoR( to.Id, src.Index.Id ); + return; + } + else + { + // note: no need to do ebp+0 check since we encode all 0 displacements as + // register assignments above (via MOV) + + write8( 0x8d ); + ModRM( displacement_size, to.Id, src.Index.Id ); + } + } + else + { + if( src.Base.IsEmpty() ) + { + if( displacement_size == 0 ) + { + // Encode [Index*Scale] as a combination of Mov and Shl. + // This is more efficient because of the bloated format which requires + // a 32 bit displacement. + + if( is16bit ) + { + MOV16RtoR( to.Id, src.Index.Id ); + SHL16ItoR( to.Id, src.Scale ); + } + else + { + MOV32RtoR( to.Id, src.Index.Id ); + SHL32ItoR( to.Id, src.Scale ); + } + return; + } + + write8( 0x8d ); + ModRM( 0, to.Id, ModRm_UseSib ); + SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); + displacement_size = 2; // force 32bit displacement. + } + else + { + if( src.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + write8( 0x8d ); + ModRM( displacement_size, to.Id, ModRm_UseSib ); + SibSB( src.Scale, src.Index.Id, src.Base.Id ); + } + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( src.Displacement ); break; + case 2: write32( src.Displacement ); break; + jNO_DEFAULT + } + +} + +emitterT void LEA32( x86Register32 to, const ModSib& src ) +{ + EmitLeaMagic( to, src ); +} + + +emitterT void LEA16( x86Register16 to, const ModSib& src ) +{ + // fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form? + + write8( 0x66 ); + EmitLeaMagic( to, src ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// Miscellaneous Section! +// Various Instructions with no parameter and no special encoding logic. +// +emitterT void RET() { write8( 0xC3 ); } +emitterT void CBW() { write16( 0x9866 ); } +emitterT void CWD() { write8( 0x98 ); } +emitterT void CDQ() { write8( 0x99 ); } +emitterT void CWDE() { write8( 0x98 ); } + +emitterT void LAHF() { write8( 0x9f ); } +emitterT void SAHF() { write8( 0x9e ); } + + +////////////////////////////////////////////////////////////////////////////////////////// +// Push / Pop Emitters +// +// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu +// is running in. So even thought these say push32, they would essentially be push64 on +// an x64 build. Should I rename them accordingly? --air +// +// Note: pushad/popad implementations are intentionally left out. The instructions are +// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. + + +emitterT void POP( x86Register32 from ) +{ + write8( 0x58 | from.Id ); +} + +emitterT void POP( const ModSib& from ) +{ + write8( 0x8f ); EmitSibMagic( 0, from ); +} + +emitterT void PUSH( u32 imm ) +{ + write8( 0x68 ); write32( imm ); +} + +emitterT void PUSH( x86Register32 from ) +{ + write8( 0x50 | from.Id ); +} + +emitterT void PUSH( const ModSib& from ) +{ + write8( 0xff ); EmitSibMagic( 6, from ); +} + +// pushes the EFLAGS register onto the stack +emitterT void PUSHFD() { write8( 0x9C ); } +// pops the EFLAGS register from the stack +emitterT void POPFD() { write8( 0x9D ); } + +} diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 51a65d712a..46e2bede35 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -25,108 +25,1503 @@ */ #pragma once -#define _ix86_included_ // used for sanity checks by headers dependent on this one. #include "ix86_types.h" -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define emitterT template +#ifdef _MSC_VER +#define __threadlocal __declspec(thread) +#else +#define __threadlocal __thread +#endif -#define MEMADDR(addr, oplen) (addr) +#define MMXONLY(code) code -#define Rex(w,r,x,b) assert(0) -#define RexR(w, reg) assert( !(w || (reg)>=8) ) -#define RexB(w, base) assert( !(w || (base)>=8) ) -#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) -#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ -extern u8 *x86Ptr[EmitterId_Count]; -extern u8 *j8Ptr[32]; -extern u32 *j32Ptr[32]; +#define emitterT __forceinline -emitterT void write8( u8 val ) { - *x86Ptr[I] = (u8)val; - x86Ptr[I]++; -} +extern __threadlocal u8 *x86Ptr; +extern __threadlocal u8 *j8Ptr[32]; +extern __threadlocal u32 *j32Ptr[32]; -emitterT void write16( u16 val ) { - *(u16*)x86Ptr[I] = (u16)val; - x86Ptr[I] += 2; -} - -emitterT void write24( u32 val ) { - *x86Ptr[I]++ = (u8)(val & 0xff); - *x86Ptr[I]++ = (u8)((val >> 8) & 0xff); - *x86Ptr[I]++ = (u8)((val >> 16) & 0xff); -} - -emitterT void write32( u32 val ) { - *(u32*)x86Ptr[I] = val; - x86Ptr[I] += 4; -} - -emitterT void write64( u64 val ){ - *(u64*)x86Ptr[I] = val; - x86Ptr[I] += 8; +template< typename T > +static emitterT void x86write( T val ) +{ + *(T*)x86Ptr = val; + x86Ptr += sizeof(T); } + +static emitterT void write8( u8 val ) +{ + *x86Ptr = val; + x86Ptr++; +} + +static emitterT void write16( u16 val ) +{ + *(u16*)x86Ptr = val; + x86Ptr += 2; +} + +static emitterT void write24( u32 val ) +{ + *x86Ptr++ = (u8)(val & 0xff); + *x86Ptr++ = (u8)((val >> 8) & 0xff); + *x86Ptr++ = (u8)((val >> 16) & 0xff); +} + +static emitterT void write32( u32 val ) +{ + *(u32*)x86Ptr = val; + x86Ptr += 4; +} + +static emitterT void write64( u64 val ) +{ + *(u64*)x86Ptr = val; + x86Ptr += 8; +} + //------------------------------------------------------------------ //------------------------------------------------------------------ // jump/align functions //------------------------------------------------------------------ -emitterT u8* ex86SetPtr( u8 *ptr ); -emitterT void ex86SetJ8( u8 *j8 ); -emitterT void ex86SetJ8A( u8 *j8 ); -emitterT void ex86SetJ16( u16 *j16 ); -emitterT void ex86SetJ16A( u16 *j16 ); -emitterT void ex86SetJ32( u32 *j32 ); -emitterT void ex86SetJ32A( u32 *j32 ); -emitterT void ex86Align( int bytes ); -emitterT void ex86AlignExecutable( int align ); +extern u8* x86SetPtr( u8 *ptr ); +extern void x86SetJ8( u8 *j8 ); +extern void x86SetJ8A( u8 *j8 ); +extern void x86SetJ16( u16 *j16 ); +extern void x86SetJ16A( u16 *j16 ); +extern void x86SetJ32( u32 *j32 ); +extern void x86SetJ32A( u32 *j32 ); +extern void x86Align( int bytes ); +extern void x86AlignExecutable( int align ); //------------------------------------------------------------------ -//------------------------------------------------------------------ -// General Emitter Helper functions -//------------------------------------------------------------------ -emitterT void WriteRmOffset(x86IntRegType to, int offset); -emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); -emitterT void ModRM( int mod, int reg, int rm ); -emitterT void SibSB( int ss, int index, int base ); -emitterT void SET8R( int cc, int to ); -emitterT void CMOV32RtoR( int cc, int to, int from ); -emitterT void CMOV32MtoR( int cc, int to, uptr from ); -emitterT u8* J8Rel( int cc, int to ); -emitterT u32* J32Rel( int cc, u32 to ); -emitterT u64 GetCPUTick( void ); -//------------------------------------------------------------------ +////////////////////////////////////////////////////////////////////////////////////////// +// New C++ Emitter! +// +// To use it just include the x86Emitter namespace into your file/class/function off choice. -emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ); -emitterT u32* eJMP32( uptr to ); -emitterT u8* eJMP8( u8 to ); -emitterT void eCALL32( u32 to ); -emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset); -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); -emitterT void eNOP( void ); -emitterT void eAND32I8toR( x86IntRegType to, u8 from ); -emitterT void eAND32ItoM( uptr to, u32 from ); -emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void eAND32I8toM( uptr to, u8 from ); -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +namespace x86Emitter +{ + extern void POP( x86Register32 from ); + extern void POP( const ModSib& from ); + + extern void PUSH( u32 imm ); + extern void PUSH( x86Register32 from ); + extern void PUSH( const ModSib& from ); + + extern void LEA32( x86Register32 to, const ModSib& src ); + extern void LEA16( x86Register16 to, const ModSib& src ); + static __forceinline void POP( void* from ) { POP( ptr[from] ); } + static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } + #define DECLARE_GROUP1_OPCODE_HELPER( lwr, bits ) \ + emitterT void lwr##bits( x86Register##bits to, x86Register##bits from ); \ + emitterT void lwr##bits( x86Register##bits to, void* from ); \ + emitterT void lwr##bits( x86Register##bits to, const ModSib& from ); \ + emitterT void lwr##bits( x86Register##bits to, u##bits imm ); \ + emitterT void lwr##bits( const ModSib& to, x86Register##bits from ); \ + emitterT void lwr##bits( void* to, x86Register##bits from ); \ + emitterT void lwr##bits( void* to, u##bits imm ); \ + emitterT void lwr##bits( const ModSib& to, u##bits imm ); + + #define DECLARE_GROUP1_OPCODE( lwr ) \ + DECLARE_GROUP1_OPCODE_HELPER( lwr, 32 ) + DECLARE_GROUP1_OPCODE_HELPER( lwr, 16 ) + DECLARE_GROUP1_OPCODE_HELPER( lwr, 8 ) + + DECLARE_GROUP1_OPCODE( ADD ) + DECLARE_GROUP1_OPCODE( CMP ) + DECLARE_GROUP1_OPCODE( OR ) + DECLARE_GROUP1_OPCODE( ADC ) + DECLARE_GROUP1_OPCODE( SBB ) + DECLARE_GROUP1_OPCODE( AND ) + DECLARE_GROUP1_OPCODE( SUB ) + DECLARE_GROUP1_OPCODE( XOR ) + +} + + +extern void CLC( void ); +extern void NOP( void ); + +//////////////////////////////////// +// mov instructions // +//////////////////////////////////// + +// mov r32 to r32 +extern void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +// mov r32 to m32 +extern void MOV32RtoM( uptr to, x86IntRegType from ); +// mov m32 to r32 +extern void MOV32MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r32 +extern void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ); +// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack +extern void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +extern void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +extern void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +extern void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +extern void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +extern void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +extern void FADD32( u32 from ); +// fsub m32 to fpu reg stack +extern void FSUB32( u32 from ); +// fmul m32 to fpu reg stack +extern void FMUL32( u32 from ); +// fdiv m32 to fpu reg stack +extern void FDIV32( u32 from ); +// fcomi st, st( i) +extern void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +extern void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +extern void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +extern void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +extern void FCOM32( u32 from ); +// fabs fpu reg stack +extern void FABS( void ); +// fsqrt fpu reg stack +extern void FSQRT( void ); +// ftan fpu reg stack +extern void FPATAN( void ); +// fsin fpu reg stack +extern void FSIN( void ); +// fchs fpu reg stack +extern void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +extern void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +extern void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +extern void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +extern void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +extern void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +extern void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +extern void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +extern void FCMOVNU32( x86IntRegType from ); +extern void FCOMP32( u32 from ); +extern void FNSTSWtoAX( void ); #define MMXONLY(code) code -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#include "ix86_macros.h" -#include "ix86.inl" -#include "ix86_3dnow.inl" -#include "ix86_fpu.inl" -#include "ix86_mmx.inl" -#include "ix86_sse.inl" +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +extern void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +extern void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +extern void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +extern void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +extern void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +extern void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +extern void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +extern void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +extern void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +extern void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +extern void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +extern void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +extern void PADDDMtoR( x86MMXRegType to, uptr from ); +extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +extern void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +extern void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +extern void PSRLWItoR( x86MMXRegType to, u8 from ); +extern void PSRLDItoR( x86MMXRegType to, u8 from ); +extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSLLWItoR( x86MMXRegType to, u8 from ); +extern void PSLLDItoR( x86MMXRegType to, u8 from ); +extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSRAWItoR( x86MMXRegType to, u8 from ); +extern void PSRADItoR( x86MMXRegType to, u8 from ); +extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); +extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); +extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +extern void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +//********************* +// SSE instructions * +//********************* +extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); + +extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE_STMXCSR( uptr from ); +extern void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* + +extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +//**********************************************************************************/ +//MOVD: Move Qword(64bit) to/from MMX/XMM reg * +//********************************************************************************** +extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); + +// SSSE3 + +extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); + +// SSE4.1 + +#ifndef _MM_MK_INSERTPS_NDX +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) +#endif + +extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); +extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); +extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +//********************* +// 3DNOW instructions * +//********************* +extern void FEMMS( void ); +extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); +extern void PFADDMtoR( x86IntRegType to, uptr from ); +extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFSUBMtoR( x86IntRegType to, uptr from ); +extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMULMtoR( x86IntRegType to, uptr from ); +extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPMtoR( x86IntRegType to, uptr from ); +extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PF2IDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMAXMtoR( x86IntRegType to, uptr from ); +extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMINMtoR( x86IntRegType to, uptr from ); +extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl deleted file mode 100644 index 24e197ffcf..0000000000 --- a/pcsx2/x86/ix86/ix86.inl +++ /dev/null @@ -1,3375 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator -* Copyright (C) 2002-2009 Pcsx2 Team -* -* This program is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 2 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -/* -* ix86 core v0.6.2 -* Authors: linuzappz -* alexey silinov -* goldfinger -* zerofrog(@gmail.com) -* cottonvibes(@gmail.com) -*/ - -#pragma once - -//------------------------------------------------------------------ -// ix86 instructions -//------------------------------------------------------------------ - -#include "PrecompiledHeader.h" -#include "System.h" -#include "ix86.h" - -emitterT void WriteRmOffset(x86IntRegType to, s32 offset) -{ - if( (to&7) == ESP ) { - if( offset == 0 ) { - ModRM( 0, 0, 4 ); - SibSB( 0, ESP, 4 ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, 4 ); - SibSB( 0, ESP, 4 ); - write8(offset); - } - else { - ModRM( 2, 0, 4 ); - SibSB( 0, ESP, 4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, 0, to ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, to ); - write8(offset); - } - else { - ModRM( 2, 0, to ); - write32(offset); - } - } -} - -emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) -{ - if ((from&7) == ESP) { - if( offset == 0 ) { - ModRM( 0, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write8(offset); - } - else { - ModRM( 2, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, to, from ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } - } -} - -emitterT void ModRM( s32 mod, s32 reg, s32 rm ) -{ - write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); -} - -emitterT void SibSB( s32 ss, s32 index, s32 base ) -{ - write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); -} - -emitterT void SET8R( int cc, int to ) -{ - RexB(0, to); - write8( 0x0F ); - write8( cc ); - write8( 0xC0 | ( to ) ); -} - -emitterT u8* J8Rel( int cc, int to ) -{ - write8( cc ); - write8( to ); - return (u8*)(x86Ptr[I] - 1); -} - -emitterT u16* J16Rel( int cc, u32 to ) -{ - write16( 0x0F66 ); - write8( cc ); - write16( to ); - return (u16*)( x86Ptr[I] - 2 ); -} - -emitterT u32* J32Rel( int cc, u32 to ) -{ - write8( 0x0F ); - write8( cc ); - write32( to ); - return (u32*)( x86Ptr[I] - 4 ); -} - -emitterT void CMOV32RtoR( int cc, int to, int from ) -{ - RexRB(0, to, from); - write8( 0x0F ); - write8( cc ); - ModRM( 3, to, from ); -} - -emitterT void CMOV32MtoR( int cc, int to, uptr from ) -{ - RexR(0, to); - write8( 0x0F ); - write8( cc ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -//////////////////////////////////////////////////// -emitterT u8* ex86SetPtr( u8* ptr ) -{ - u8* rv= x86Ptr[I]; - if (ptr!=0) - x86Ptr[I] = ptr; - return rv; -} - -//////////////////////////////////////////////////// -emitterT void ex86SetJ8( u8* j8 ) -{ - u32 jump = ( x86Ptr[I] - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - *j8 = (u8)jump; -} - -emitterT void ex86SetJ8A( u8* j8 ) -{ - u32 jump = ( x86Ptr[I] - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - - if( ((uptr)x86Ptr[I]&0xf) > 4 ) { - - uptr newjump = jump + 16-((uptr)x86Ptr[I]&0xf); - - if( newjump <= 0x7f ) { - jump = newjump; - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; - } - } - *j8 = (u8)jump; -} - -emitterT void ex86SetJ16( u16 *j16 ) -{ - // doesn't work - u32 jump = ( x86Ptr[I] - (u8*)j16 ) - 2; - - if ( jump > 0x7fff ) { - Console::Error( "j16 greater than 0x7fff!!" ); - assert(0); - } - *j16 = (u16)jump; -} - -emitterT void ex86SetJ16A( u16 *j16 ) -{ - if( ((uptr)x86Ptr[I]&0xf) > 4 ) { - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; - } - ex86SetJ16(j16); -} - -//////////////////////////////////////////////////// -emitterT void ex86SetJ32( u32* j32 ) -{ - *j32 = ( x86Ptr[I] - (u8*)j32 ) - 4; -} - -emitterT void ex86SetJ32A( u32* j32 ) -{ - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; - ex86SetJ32(j32); -} - -//////////////////////////////////////////////////// -emitterT void ex86Align( int bytes ) -{ - // forward align - x86Ptr[I] = (u8*)( ( (uptr)x86Ptr[I] + bytes - 1) & ~( bytes - 1 ) ); -} - -//////////////////////////////////////////////////// -// Generates executable code to align to the given alignment (could be useful for the second leg -// of if/else conditionals, which usually fall through a jump target label). -emitterT void ex86AlignExecutable( int align ) -{ - uptr newx86 = ( (uptr)x86Ptr[I] + align - 1) & ~( align - 1 ); - uptr bytes = ( newx86 - (uptr)x86Ptr[I] ); - - switch( bytes ) - { - case 0: break; - - case 1: eNOP(); break; - case 2: eMOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: eNOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: eNOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; - - default: - { - // for larger alignments, just use a JMP... - u8* aligned_target = eJMP8(0); - x86Ptr[I] = (u8*)newx86; - ex86SetJ8( aligned_target ); - } - } - - jASSUME( x86Ptr[0] == (u8*)newx86 ); -} - -/********************/ -/* IX86 intructions */ -/********************/ - -emitterT void eSTC( void ) -{ - write8( 0xF9 ); -} - -emitterT void eCLC( void ) -{ - write8( 0xF8 ); -} - -// NOP 1-byte -emitterT void eNOP( void ) -{ - write8(0x90); -} - - -//////////////////////////////////// -// mov instructions / -//////////////////////////////////// - -/* mov r64 to r64 */ -emitterT void eMOV64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); -} - -/* mov r64 to m64 */ -emitterT void eMOV64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1, from); - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - write32( (u32)MEMADDR(to, 4) ); -} - -/* mov m64 to r64 */ -emitterT void eMOV64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - write32( (u32)MEMADDR(from, 4) ); -} - -/* mov imm32 to m64 */ -emitterT void eMOV64I32toM(uptr to, u32 from ) -{ - Rex(1, 0, 0, 0); - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -// mov imm64 to r64 -emitterT void eMOV64ItoR( x86IntRegType to, u64 from) -{ - RexB(1, to); - write8( 0xB8 | (to & 0x7) ); - write64( from ); -} - -/* mov imm32 to r64 */ -emitterT void eMOV64I32toR( x86IntRegType to, s32 from ) -{ - RexB(1, to); - write8( 0xC7 ); - ModRM( 0, 0, to ); - write32( from ); -} - -// mov imm64 to [r64+off] -emitterT void eMOV64ItoRmOffset( x86IntRegType to, u32 from, int offset) -{ - RexB(1,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write32(from); -} - -// mov [r64+offset] to r64 -emitterT void eMOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(1, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -/* mov [r64][r64*scale] to r64 */ -emitterT void eMOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(1, to, from2, from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -/* mov r64 to [r64+offset] */ -emitterT void eMOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(1,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); -} - -/* mov r64 to [r64][r64*scale] */ -emitterT void eMOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(1, to, from2, from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - - -/* mov r32 to r32 */ -emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); -} - -/* mov r32 to m32 */ -emitterT void eMOV32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0, from); - if (from == EAX) { - write8(0xA3); - } else { - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - } - write32( MEMADDR(to, 4) ); -} - -/* mov m32 to r32 */ -emitterT void eMOV32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0, to); - if (to == EAX) { - write8(0xA1); - } else { - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - } - write32( MEMADDR(from, 4) ); -} - -/* mov [r32] to r32 */ -emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from ) { - RexRB(0, to, from); - write8(0x8B); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) { - RexRB(0, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -/* mov [r32+r32*scale] to r32 */ -emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(0,to,from2,from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -// mov r32 to [r32<( 0x8B ); - ModRM( 0, to, 0x4 ); - ModRM( scale, from1, 5); - write32(from2); -} - -/* mov r32 to [r32] */ -emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from ) { - RexRB(0, from, to); - if ((to&7) == ESP) { - write8( 0x89 ); - ModRM( 0, from, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - } - else { - write8( 0x89 ); - ModRM( 0, from, to ); - } -} - -/* mov r32 to [r32][r32*scale] */ -emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(0, to, from2, from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -/* mov imm32 to r32 */ -emitterT void eMOV32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0, to); - write8( 0xB8 | (to & 0x7) ); - write32( from ); -} - -/* mov imm32 to m32 */ -emitterT void eMOV32ItoM(uptr to, u32 from ) -{ - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -// mov imm32 to [r32+off] -emitterT void eMOV32ItoRmOffset( x86IntRegType to, u32 from, int offset) -{ - RexB(0,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write32(from); -} - -// mov r32 to [r32+off] -emitterT void eMOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) -{ - RexRB(0,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); -} - -/* mov r16 to m16 */ -emitterT void eMOV16RtoM(uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* mov m16 to r16 */ -emitterT void eMOV16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from) -{ - write8( 0x66 ); - RexRB(0,to,from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - write8( 0x66 ); - RexRB(0,to,from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) -{ - write8(0x66); - RexRXB(0,to,from1,0); - write8( 0x8B ); - ModRM( 0, to, SIB ); - SibSB( scale, from1, SIBDISP); - write32(from2); -} - -emitterT void eMOV16RtoRm(x86IntRegType to, x86IntRegType from) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x89 ); - ModRM( 0, from, to ); -} - -/* mov imm16 to m16 */ -emitterT void eMOV16ItoM( uptr to, u16 from ) -{ - write8( 0x66 ); - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* mov r16 to [r32][r32*scale] */ -emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - write8( 0x66 ); - RexRXB(0,to,from2,from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -emitterT void eMOV16ItoR( x86IntRegType to, u16 from ) -{ - RexB(0, to); - write16( 0xB866 | ((to & 0x7)<<8) ); - write16( from ); -} - -// mov imm16 to [r16+off] -emitterT void eMOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset) -{ - write8(0x66); - RexB(0,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write16(from); -} - -// mov r16 to [r16+off] -emitterT void eMOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) -{ - write8(0x66); - RexRB(0,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); -} - -/* mov r8 to m8 */ -emitterT void eMOV8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x88 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* mov m8 to r8 */ -emitterT void eMOV8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x8A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* mov [r32] to r8 */ -emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from) -{ - RexRB(0,to,from); - write8( 0x8A ); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset) -{ - RexRB(0,to,from); - write8( 0x8A ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) -{ - RexRXB(0,to,from1,0); - write8( 0x8A ); - ModRM( 0, to, SIB ); - SibSB( scale, from1, SIBDISP); - write32(from2); -} - -emitterT void eMOV8RtoRm(x86IntRegType to, x86IntRegType from) -{ - RexRB(0,from,to); - write8( 0x88 ); - WriteRmOffsetFrom(from, to, 0); -} - -/* mov imm8 to m8 */ -emitterT void eMOV8ItoM( uptr to, u8 from ) -{ - write8( 0xC6 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// mov imm8 to r8 -emitterT void eMOV8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0, to); - write8( 0xB0 | (to & 0x7) ); - write8( from ); -} - -// mov imm8 to [r8+off] -emitterT void eMOV8ItoRmOffset( x86IntRegType to, u8 from, int offset) -{ - assert( to != ESP ); - RexB(0,to); - write8( 0xC6 ); - WriteRmOffset(to,offset); - write8(from); -} - -// mov r8 to [r8+off] -emitterT void eMOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) -{ - assert( to != ESP ); - RexRB(0,from,to); - write8( 0x88 ); - WriteRmOffsetFrom(from,to,offset); -} - -/* movsx r8 to r32 */ -emitterT void eMOVSX32R8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 3, to, from ); -} - -emitterT void eMOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movsx m8 to r32 */ -emitterT void eMOVSX32M8toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xBE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movsx r16 to r32 */ -emitterT void eMOVSX32R16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 3, to, from ); -} - -emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movsx m16 to r32 */ -emitterT void eMOVSX32M16toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xBF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movzx r8 to r32 */ -emitterT void eMOVZX32R8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 3, to, from ); -} - -emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movzx m8 to r32 */ -emitterT void eMOVZX32M8toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xB60F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movzx r16 to r32 */ -emitterT void eMOVZX32R16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 3, to, from ); -} - -emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movzx m16 to r32 */ -emitterT void eMOVZX32M16toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xB70F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* cmovbe r32 to r32 */ -emitterT void eCMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x46, to, from ); -} - -/* cmovbe m32 to r32*/ -emitterT void eCMOVBE32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x46, to, from ); -} - -/* cmovb r32 to r32 */ -emitterT void eCMOVB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x42, to, from ); -} - -/* cmovb m32 to r32*/ -emitterT void eCMOVB32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x42, to, from ); -} - -/* cmovae r32 to r32 */ -emitterT void eCMOVAE32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x43, to, from ); -} - -/* cmovae m32 to r32*/ -emitterT void eCMOVAE32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x43, to, from ); -} - -/* cmova r32 to r32 */ -emitterT void eCMOVA32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x47, to, from ); -} - -/* cmova m32 to r32*/ -emitterT void eCMOVA32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x47, to, from ); -} - -/* cmovo r32 to r32 */ -emitterT void eCMOVO32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x40, to, from ); -} - -/* cmovo m32 to r32 */ -emitterT void eCMOVO32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x40, to, from ); -} - -/* cmovp r32 to r32 */ -emitterT void eCMOVP32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x4A, to, from ); -} - -/* cmovp m32 to r32 */ -emitterT void eCMOVP32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x4A, to, from ); -} - -/* cmovs r32 to r32 */ -emitterT void eCMOVS32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x48, to, from ); -} - -/* cmovs m32 to r32 */ -emitterT void eCMOVS32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x48, to, from ); -} - -/* cmovno r32 to r32 */ -emitterT void eCMOVNO32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x41, to, from ); -} - -/* cmovno m32 to r32 */ -emitterT void eCMOVNO32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x41, to, from ); -} - -/* cmovnp r32 to r32 */ -emitterT void eCMOVNP32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x4B, to, from ); -} - -/* cmovnp m32 to r32 */ -emitterT void eCMOVNP32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x4B, to, from ); -} - -/* cmovns r32 to r32 */ -emitterT void eCMOVNS32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x49, to, from ); -} - -/* cmovns m32 to r32 */ -emitterT void eCMOVNS32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x49, to, from ); -} - -/* cmovne r32 to r32 */ -emitterT void eCMOVNE32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x45, to, from ); -} - -/* cmovne m32 to r32*/ -emitterT void eCMOVNE32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x45, to, from ); -} - -/* cmove r32 to r32*/ -emitterT void eCMOVE32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x44, to, from ); -} - -/* cmove m32 to r32*/ -emitterT void eCMOVE32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x44, to, from ); -} - -/* cmovg r32 to r32*/ -emitterT void eCMOVG32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x4F, to, from ); -} - -/* cmovg m32 to r32*/ -emitterT void eCMOVG32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x4F, to, from ); -} - -/* cmovge r32 to r32*/ -emitterT void eCMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x4D, to, from ); -} - -/* cmovge m32 to r32*/ -emitterT void eCMOVGE32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x4D, to, from ); -} - -/* cmovl r32 to r32*/ -emitterT void eCMOVL32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x4C, to, from ); -} - -/* cmovl m32 to r32*/ -emitterT void eCMOVL32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x4C, to, from ); -} - -/* cmovle r32 to r32*/ -emitterT void eCMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) -{ - CMOV32RtoR( 0x4E, to, from ); -} - -/* cmovle m32 to r32*/ -emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) -{ - CMOV32MtoR( 0x4E, to, from ); -} - -//////////////////////////////////// -// arithmetic instructions / -//////////////////////////////////// - -/* add imm32 to r64 */ -emitterT void eADD64ItoR( x86IntRegType to, u32 from ) -{ - Rex(1, 0, 0, to >> 3); - if ( to == EAX) { - write8( 0x05 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - } - write32( from ); -} - -/* add m64 to r64 */ -emitterT void eADD64MtoR( x86IntRegType to, uptr from ) -{ - Rex(1, to >> 3, 0, 0); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* add r64 to r64 */ -emitterT void eADD64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x01 ); - ModRM( 3, from, to ); -} - -/* add imm32 to EAX */ -emitterT void eADD32ItoEAX( s32 imm ) -{ - write8( 0x05 ); - write32( imm ); -} - -/* add imm32 to r32 */ -emitterT void eADD32ItoR( x86IntRegType to, s32 imm ) -{ - RexB(0, to); - if (imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8( (s8)imm ); - } - else - { - if ( to == EAX ) { - eADD32ItoEAX(imm); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - write32( imm ); - } - } -} - -/* add imm32 to m32 */ -emitterT void eADD32ItoM( uptr to, s32 imm ) -{ - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( imm ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( imm ); - } -} - -// add imm32 to [r32+off] -emitterT void eADD32ItoRmOffset( x86IntRegType to, s32 imm, s32 offset) -{ - RexB(0,to); - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - WriteRmOffset(to,offset); - write8(imm); - } - else - { - write8( 0x81 ); - WriteRmOffset(to,offset); - write32(imm); - } -} - -/* add r32 to r32 */ -emitterT void eADD32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x01 ); - ModRM( 3, from, to ); -} - -/* add r32 to m32 */ -emitterT void eADD32RtoM(uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* add m32 to r32 */ -emitterT void eADD32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// add r16 to r16 -emitterT void eADD16RtoR( x86IntRegType to , x86IntRegType from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x03 ); - ModRM( 3, to, from ); -} - -/* add imm16 to r16 */ -emitterT void eADD16ItoR( x86IntRegType to, s16 imm ) -{ - write8( 0x66 ); - RexB(0,to); - - if ( to == EAX) - { - write8( 0x05 ); - write16( imm ); - } - else if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8((u8)imm ); - } - else - { - write8( 0x81 ); - ModRM( 3, 0, to ); - write16( imm ); - } -} - -/* add imm16 to m16 */ -emitterT void eADD16ItoM( uptr to, s16 imm ) -{ - write8( 0x66 ); - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)imm ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( imm ); - } -} - -/* add r16 to m16 */ -emitterT void eADD16RtoM(uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* add m16 to r16 */ -emitterT void eADD16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// add m8 to r8 -emitterT void eADD8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x02 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* adc imm32 to r32 */ -emitterT void eADC32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x15 ); - } - else { - write8( 0x81 ); - ModRM( 3, 2, to ); - } - write32( from ); -} - -/* adc imm32 to m32 */ -emitterT void eADC32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* adc r32 to r32 */ -emitterT void eADC32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x11 ); - ModRM( 3, from, to ); -} - -/* adc m32 to r32 */ -emitterT void eADC32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x13 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// adc r32 to m32 -emitterT void eADC32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x11 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* inc r32 */ -emitterT void eINC32R( x86IntRegType to ) -{ - write8( 0x40 + to ); -} - -/* inc m32 */ -emitterT void eINC32M( u32 to ) -{ - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* inc r16 */ -emitterT void eINC16R( x86IntRegType to ) -{ - write8( 0x66 ); - write8( 0x40 + to ); -} - -/* inc m16 */ -emitterT void eINC16M( u32 to ) -{ - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); -} - - -/* sub imm32 to r64 */ -emitterT void eSUB64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write32( from ); -} - -/* sub r64 to r64 */ -emitterT void eSUB64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x29 ); - ModRM( 3, from, to ); -} - -/* sub m64 to r64 */ -emitterT void eSUB64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sub imm32 to r32 */ -emitterT void eSUB32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write32( from ); -} - -/* sub imm32 to m32 */ -emitterT void eSUB32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* sub r32 to r32 */ -emitterT void eSUB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x29 ); - ModRM( 3, from, to ); -} - -/* sub m32 to r32 */ -emitterT void eSUB32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// sub r32 to m32 -emitterT void eSUB32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x29 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -// sub r16 to r16 -emitterT void eSUB16RtoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x2b ); - ModRM( 3, to, from ); -} - -/* sub imm16 to r16 */ -emitterT void eSUB16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); - RexB(0,to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write16( from ); -} - -/* sub imm16 to m16 */ -emitterT void eSUB16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* sub m16 to r16 */ -emitterT void eSUB16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); - RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sbb r64 to r64 */ -emitterT void eSBB64RtoR( x86IntRegType to, x86IntRegType from ) { - RexRB(1, from,to); - write8( 0x19 ); - ModRM( 3, from, to ); -} - -/* sbb imm32 to r32 */ -emitterT void eSBB32ItoR( x86IntRegType to, u32 from ) { - RexB(0,to); - if ( to == EAX ) { - write8( 0x1D ); - } - else { - write8( 0x81 ); - ModRM( 3, 3, to ); - } - write32( from ); -} - -/* sbb imm32 to m32 */ -emitterT void eSBB32ItoM( uptr to, u32 from ) { - write8( 0x81 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* sbb r32 to r32 */ -emitterT void eSBB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x19 ); - ModRM( 3, from, to ); -} - -/* sbb m32 to r32 */ -emitterT void eSBB32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x1B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sbb r32 to m32 */ -emitterT void eSBB32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x19 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* dec r32 */ -emitterT void eDEC32R( x86IntRegType to ) -{ - write8( 0x48 + to ); -} - -/* dec m32 */ -emitterT void eDEC32M( u32 to ) -{ - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* dec r16 */ -emitterT void eDEC16R( x86IntRegType to ) -{ - write8( 0x66 ); - write8( 0x48 + to ); -} - -/* dec m16 */ -emitterT void eDEC16M( u32 to ) -{ - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* mul eax by r32 to edx:eax */ -emitterT void eMUL32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 4, from ); -} - -/* imul eax by r32 to edx:eax */ -emitterT void eIMUL32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 5, from ); -} - -/* mul eax by m32 to edx:eax */ -emitterT void eMUL32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* imul eax by m32 to edx:eax */ -emitterT void eIMUL32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* imul r32 by r32 to r32 */ -emitterT void eIMUL32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xAF0F ); - ModRM( 3, to, from ); -} - -/* div eax by r32 to edx:eax */ -emitterT void eDIV32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 6, from ); -} - -/* idiv eax by r32 to edx:eax */ -emitterT void eIDIV32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 7, from ); -} - -/* div eax by m32 to edx:eax */ -emitterT void eDIV32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* idiv eax by m32 to edx:eax */ -emitterT void eIDIV32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -//////////////////////////////////// -// shifting instructions / -//////////////////////////////////// - -/* shl imm8 to r64 */ -emitterT void eSHL64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1, to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 4, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); -} - -/* shl cl to r64 */ -emitterT void eSHL64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 4, to ); -} - -/* shr imm8 to r64 */ -emitterT void eSHR64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1,to); - if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 5, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); -} - -/* shr cl to r64 */ -emitterT void eSHR64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 5, to ); -} - -/* shl imm8 to r32 */ -emitterT void eSHL32ItoR( x86IntRegType to, u8 from ) -{ - RexB(0, to); - if ( from == 1 ) - { - write8( 0xD1 ); - write8( 0xE0 | (to & 0x7) ); - return; - } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); -} - -/* shl imm8 to m32 */ -emitterT void eSHL32ItoM( uptr to, u8 from ) -{ - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4) ); - } - else - { - write8( 0xC1 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); - } -} - -/* shl cl to r32 */ -emitterT void eSHL32CLtoR( x86IntRegType to ) -{ - RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 4, to ); -} - -// shl imm8 to r16 -emitterT void eSHL16ItoR( x86IntRegType to, u8 from ) -{ - write8(0x66); - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD1 ); - write8( 0xE0 | (to & 0x7) ); - return; - } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); -} - -// shl imm8 to r8 -emitterT void eSHL8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD0 ); - write8( 0xE0 | (to & 0x7) ); - return; - } - write8( 0xC0 ); - ModRM( 3, 4, to ); - write8( from ); -} - -/* shr imm8 to r32 */ -emitterT void eSHR32ItoR( x86IntRegType to, u8 from ) { - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD1 ); - write8( 0xE8 | (to & 0x7) ); - } - else - { - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); - } -} - -/* shr imm8 to m32 */ -emitterT void eSHR32ItoM( uptr to, u8 from ) -{ - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 4) ); - } - else - { - write8( 0xC1 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); - } -} - -/* shr cl to r32 */ -emitterT void eSHR32CLtoR( x86IntRegType to ) -{ - RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 5, to ); -} - -// shr imm8 to r16 -emitterT void eSHR16ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 5, to ); - } - else - { - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); - } -} - -// shr imm8 to r8 -emitterT void eSHR8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD0 ); - write8( 0xE8 | (to & 0x7) ); - } - else - { - write8( 0xC0 ); - ModRM( 3, 5, to ); - write8( from ); - } -} - -/* sar imm8 to r64 */ -emitterT void eSAR64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1,to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 7, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); -} - -/* sar cl to r64 */ -emitterT void eSAR64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 7, to ); -} - -/* sar imm8 to r32 */ -emitterT void eSAR32ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 7, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); -} - -/* sar imm8 to m32 */ -emitterT void eSAR32ItoM( uptr to, u8 from ) -{ - write8( 0xC1 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* sar cl to r32 */ -emitterT void eSAR32CLtoR( x86IntRegType to ) -{ - RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 7, to ); -} - -// sar imm8 to r16 -emitterT void eSAR16ItoR( x86IntRegType to, u8 from ) -{ - write8(0x66); - RexB(0,to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 7, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); -} - -emitterT void eROR32ItoR( x86IntRegType to,u8 from ) -{ - RexB(0,to); - if ( from == 1 ) { - write8( 0xd1 ); - write8( 0xc8 | to ); - } - else - { - write8( 0xc1 ); - write8( 0xc8 | to ); - write8( from ); - } -} - -emitterT void eRCR32ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( from == 1 ) { - write8( 0xd1 ); - ModRM(3, 3, to); - } - else - { - write8( 0xc1 ); - ModRM(3, 3, to); - write8( from ); - } -} - -emitterT void eRCR32ItoM( uptr to, u8 from ) -{ - RexB(0,to); - if ( from == 1 ) { - write8( 0xd1 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - } - else - { - write8( 0xc1 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( from ); - } -} - -// shld imm8 to r32 -emitterT void eSHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) -{ - RexRB(0,from,to); - write8( 0x0F ); - write8( 0xA4 ); - ModRM( 3, from, to ); - write8( shift ); -} - -// shrd imm8 to r32 -emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) -{ - RexRB(0,from,to); - write8( 0x0F ); - write8( 0xAC ); - ModRM( 3, from, to ); - write8( shift ); -} - -//////////////////////////////////// -// logical instructions / -//////////////////////////////////// - -/* or imm32 to r32 */ -emitterT void eOR64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write32( from ); -} - -/* or m64 to r64 */ -emitterT void eOR64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* or r64 to r64 */ -emitterT void eOR64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -// or r32 to m64 -emitterT void eOR64RtoM(uptr to, x86IntRegType from ) -{ - RexR(1,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* or imm32 to r32 */ -emitterT void eOR32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write32( from ); -} - -/* or imm32 to m32 */ -emitterT void eOR32ItoM(uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* or r32 to r32 */ -emitterT void eOR32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -/* or r32 to m32 */ -emitterT void eOR32RtoM(uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* or m32 to r32 */ -emitterT void eOR32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// or r16 to r16 -emitterT void eOR16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -// or imm16 to r16 -emitterT void eOR16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write16( from ); -} - -// or imm16 to m316 -emitterT void eOR16ItoM( uptr to, u16 from ) -{ - write8(0x66); - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* or m16 to r16 */ -emitterT void eOR16MtoR( x86IntRegType to, uptr from ) -{ - write8(0x66); - RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// or r16 to m16 -emitterT void eOR16RtoM( uptr to, x86IntRegType from ) -{ - write8(0x66); - RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -// or r8 to r8 -emitterT void eOR8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x08 ); - ModRM( 3, from, to ); -} - -// or r8 to m8 -emitterT void eOR8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x08 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -// or imm8 to m8 -emitterT void eOR8ItoM( uptr to, u8 from ) -{ - write8( 0x80 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// or m8 to r8 -emitterT void eOR8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x0A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* xor imm32 to r64 */ -emitterT void eXOR64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1,to); - if ( to == EAX ) { - write8( 0x35 ); - } else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write32( from ); -} - -/* xor r64 to r64 */ -emitterT void eXOR64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor m64 to r64 */ -emitterT void eXOR64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x33 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* xor r64 to m64 */ -emitterT void eXOR64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* xor imm32 to r32 */ -emitterT void eXOR32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x35 ); - } - else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write32( from ); -} - -/* xor imm32 to m32 */ -emitterT void eXOR32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* xor r32 to r32 */ -emitterT void eXOR32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor r16 to r16 */ -emitterT void eXOR16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor r32 to m32 */ -emitterT void eXOR32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* xor m32 to r32 */ -emitterT void eXOR32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x33 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// xor imm16 to r16 -emitterT void eXOR16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) { - write8( 0x35 ); - } - else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write16( from ); -} - -// xor r16 to m16 -emitterT void eXOR16RtoM( uptr to, x86IntRegType from ) -{ - write8(0x66); - RexR(0,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and imm32 to r64 */ -emitterT void eAND64I32toR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x25 ); - } else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); -} - -/* and m64 to r64 */ -emitterT void eAND64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* and r64 to m64 */ -emitterT void eAND64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1, from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and r64 to r64 */ -emitterT void eAND64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x21 ); - ModRM( 3, from, to ); -} - -/* and imm32 to m64 */ -emitterT void eAND64I32toM( uptr to, u32 from ) -{ - Rex(1,0,0,0); - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* and imm32 to r32 */ -emitterT void eAND32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if(from < 0x80) { - eAND32I8toR(to, (u8)from); - } - else { - if ( to == EAX ) { - write8( 0x25 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); - } -} - -/* and sign ext imm8 to r32 */ -emitterT void eAND32I8toR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8( from ); -} - -/* and imm32 to m32 */ -emitterT void eAND32ItoM( uptr to, u32 from ) -{ - if(from < 0x80) { - eAND32I8toM(to, (u8)from); - } - else { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); - } -} - -/* bts imm32 to r32 */ -emitterT void eBTS32MtoR( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0xf ); - write8( 0xab ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and sign ext imm8 to m32 */ -emitterT void eAND32I8toM( uptr to, u8 from ) -{ - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* and r32 to r32 */ -emitterT void eAND32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x21 ); - ModRM( 3, from, to ); -} - -/* and r32 to m32 */ -emitterT void eAND32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m32 to r32 */ -emitterT void eAND32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// Warning: Untested form of AND. -emitterT void eAND32RmtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write8( 0x23 ); - ModRM( 0, to, from ); -} - -// Warning: Untested form of AND. -emitterT void eAND32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0x23 ); - WriteRmOffsetFrom(to,from,offset); -} - -// and r16 to r16 -emitterT void eAND16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x23 ); - ModRM( 3, to, from ); -} - -/* and imm16 to r16 */ -emitterT void eAND16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - - if ( to == EAX ) { - write8( 0x25 ); - write16( from ); - } - else if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8((u8)from ); - } - else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - write16( from ); - } -} - -/* and imm16 to m16 */ -emitterT void eAND16ItoM( uptr to, u16 from ) -{ - write8(0x66); - if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)from ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); - - } -} - -/* and r16 to m16 */ -emitterT void eAND16RtoM( uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m16 to r16 */ -emitterT void eAND16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); -} - -/* and imm8 to r8 */ -emitterT void eAND8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x24 ); - } - else { - write8( 0x80 ); - ModRM( 3, 0x4, to ); - } - write8( from ); -} - -/* and imm8 to m8 */ -emitterT void eAND8ItoM( uptr to, u8 from ) -{ - write8( 0x80 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// and r8 to r8 -emitterT void eAND8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write8( 0x22 ); - ModRM( 3, to, from ); -} - -/* and r8 to m8 */ -emitterT void eAND8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x20 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m8 to r8 */ -emitterT void eAND8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x22 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); -} - -/* not r64 */ -emitterT void eNOT64R( x86IntRegType from ) -{ - RexB(1, from); - write8( 0xF7 ); - ModRM( 3, 2, from ); -} - -/* not r32 */ -emitterT void eNOT32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 2, from ); -} - -// not m32 -emitterT void eNOT32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(from, 4)); -} - -/* neg r64 */ -emitterT void eNEG64R( x86IntRegType from ) -{ - RexB(1, from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - -/* neg r32 */ -emitterT void eNEG32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - -emitterT void eNEG32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(from, 4)); -} - -/* neg r16 */ -emitterT void eNEG16R( x86IntRegType from ) -{ - write8( 0x66 ); - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - -//////////////////////////////////// -// jump instructions / -//////////////////////////////////// - -emitterT u8* JMP( uptr to ) { - uptr jump = ( x86Ptr[0] - (u8*)to ) - 1; - - if ( jump > 0x7f ) { - assert( to <= 0xffffffff ); - return (u8*)eJMP32( to ); - } - else { - return (u8*)eJMP8( to ); - } -} - -/* jmp rel8 */ -emitterT u8* eJMP8( u8 to ) -{ - write8( 0xEB ); - write8( to ); - return x86Ptr[I] - 1; -} - -/* jmp rel32 */ -emitterT u32* eJMP32( uptr to ) -{ - assert( (sptr)to <= 0x7fffffff && (sptr)to >= -0x7fffffff ); - write8( 0xE9 ); - write32( to ); - return (u32*)(x86Ptr[I] - 4 ); -} - -/* jmp r32/r64 */ -emitterT void eJMPR( x86IntRegType to ) -{ - RexB(0, to); - write8( 0xFF ); - ModRM( 3, 4, to ); -} - -// jmp m32 -emitterT void eJMP32M( uptr to ) -{ - write8( 0xFF ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4)); -} - -/* jp rel8 */ -emitterT u8* eJP8( u8 to ) { - return J8Rel( 0x7A, to ); -} - -/* jnp rel8 */ -emitterT u8* eJNP8( u8 to ) { - return J8Rel( 0x7B, to ); -} - -/* je rel8 */ -emitterT u8* eJE8( u8 to ) { - return J8Rel( 0x74, to ); -} - -/* jz rel8 */ -emitterT u8* eJZ8( u8 to ) -{ - return J8Rel( 0x74, to ); -} - -/* js rel8 */ -emitterT u8* eJS8( u8 to ) -{ - return J8Rel( 0x78, to ); -} - -/* jns rel8 */ -emitterT u8* eJNS8( u8 to ) -{ - return J8Rel( 0x79, to ); -} - -/* jg rel8 */ -emitterT u8* eJG8( u8 to ) -{ - return J8Rel( 0x7F, to ); -} - -/* jge rel8 */ -emitterT u8* eJGE8( u8 to ) -{ - return J8Rel( 0x7D, to ); -} - -/* jl rel8 */ -emitterT u8* eJL8( u8 to ) -{ - return J8Rel( 0x7C, to ); -} - -/* ja rel8 */ -emitterT u8* eJA8( u8 to ) -{ - return J8Rel( 0x77, to ); -} - -emitterT u8* eJAE8( u8 to ) -{ - return J8Rel( 0x73, to ); -} - -/* jb rel8 */ -emitterT u8* eJB8( u8 to ) -{ - return J8Rel( 0x72, to ); -} - -/* jbe rel8 */ -emitterT u8* eJBE8( u8 to ) -{ - return J8Rel( 0x76, to ); -} - -/* jle rel8 */ -emitterT u8* eJLE8( u8 to ) -{ - return J8Rel( 0x7E, to ); -} - -/* jne rel8 */ -emitterT u8* eJNE8( u8 to ) -{ - return J8Rel( 0x75, to ); -} - -/* jnz rel8 */ -emitterT u8* eJNZ8( u8 to ) -{ - return J8Rel( 0x75, to ); -} - -/* jng rel8 */ -emitterT u8* eJNG8( u8 to ) -{ - return J8Rel( 0x7E, to ); -} - -/* jnge rel8 */ -emitterT u8* eJNGE8( u8 to ) -{ - return J8Rel( 0x7C, to ); -} - -/* jnl rel8 */ -emitterT u8* eJNL8( u8 to ) -{ - return J8Rel( 0x7D, to ); -} - -/* jnle rel8 */ -emitterT u8* eJNLE8( u8 to ) -{ - return J8Rel( 0x7F, to ); -} - -/* jo rel8 */ -emitterT u8* eJO8( u8 to ) -{ - return J8Rel( 0x70, to ); -} - -/* jno rel8 */ -emitterT u8* eJNO8( u8 to ) -{ - return J8Rel( 0x71, to ); -} -/* Untested and slower, use 32bit versions instead -// ja rel16 -emitterT u16* eJA16( u16 to ) -{ -return J16Rel( 0x87, to ); -} - -// jb rel16 -emitterT u16* eJB16( u16 to ) -{ -return J16Rel( 0x82, to ); -} - -// je rel16 -emitterT u16* eJE16( u16 to ) -{ -return J16Rel( 0x84, to ); -} - -// jz rel16 -emitterT u16* eJZ16( u16 to ) -{ -return J16Rel( 0x84, to ); -} -*/ -// jb rel32 -emitterT u32* eJB32( u32 to ) -{ - return J32Rel( 0x82, to ); -} - -/* je rel32 */ -emitterT u32* eJE32( u32 to ) -{ - return J32Rel( 0x84, to ); -} - -/* jz rel32 */ -emitterT u32* eJZ32( u32 to ) -{ - return J32Rel( 0x84, to ); -} - -/* js rel32 */ -emitterT u32* eJS32( u32 to ) -{ - return J32Rel( 0x88, to ); -} - -/* jns rel32 */ -emitterT u32* eJNS32( u32 to ) -{ - return J32Rel( 0x89, to ); -} - -/* jg rel32 */ -emitterT u32* eJG32( u32 to ) -{ - return J32Rel( 0x8F, to ); -} - -/* jge rel32 */ -emitterT u32* eJGE32( u32 to ) -{ - return J32Rel( 0x8D, to ); -} - -/* jl rel32 */ -emitterT u32* eJL32( u32 to ) -{ - return J32Rel( 0x8C, to ); -} - -/* jle rel32 */ -emitterT u32* eJLE32( u32 to ) -{ - return J32Rel( 0x8E, to ); -} - -/* ja rel32 */ -emitterT u32* eJA32( u32 to ) -{ - return J32Rel( 0x87, to ); -} - -/* jae rel32 */ -emitterT u32* eJAE32( u32 to ) -{ - return J32Rel( 0x83, to ); -} - -/* jne rel32 */ -emitterT u32* eJNE32( u32 to ) -{ - return J32Rel( 0x85, to ); -} - -/* jnz rel32 */ -emitterT u32* eJNZ32( u32 to ) -{ - return J32Rel( 0x85, to ); -} - -/* jng rel32 */ -emitterT u32* eJNG32( u32 to ) -{ - return J32Rel( 0x8E, to ); -} - -/* jnge rel32 */ -emitterT u32* eJNGE32( u32 to ) -{ - return J32Rel( 0x8C, to ); -} - -/* jnl rel32 */ -emitterT u32* eJNL32( u32 to ) -{ - return J32Rel( 0x8D, to ); -} - -/* jnle rel32 */ -emitterT u32* eJNLE32( u32 to ) -{ - return J32Rel( 0x8F, to ); -} - -/* jo rel32 */ -emitterT u32* eJO32( u32 to ) -{ - return J32Rel( 0x80, to ); -} - -/* jno rel32 */ -emitterT u32* eJNO32( u32 to ) -{ - return J32Rel( 0x81, to ); -} - - - -/* call func */ -emitterT void eCALLFunc( uptr func ) -{ - func -= ( (uptr)x86Ptr[0] + 5 ); - assert( (sptr)func <= 0x7fffffff && (sptr)func >= -0x7fffffff ); - eCALL32(func); -} - -/* call rel32 */ -emitterT void eCALL32( u32 to ) -{ - write8( 0xE8 ); - write32( to ); -} - -/* call r32 */ -emitterT void eCALL32R( x86IntRegType to ) -{ - write8( 0xFF ); - ModRM( 3, 2, to ); -} - -/* call r64 */ -emitterT void eCALL64R( x86IntRegType to ) -{ - RexB(0, to); - write8( 0xFF ); - ModRM( 3, 2, to ); -} - -/* call m32 */ -emitterT void eCALL32M( u32 to ) -{ - write8( 0xFF ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -//////////////////////////////////// -// misc instructions / -//////////////////////////////////// - -/* cmp imm32 to r64 */ -emitterT void eCMP64I32toR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x3D ); - } - else { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write32( from ); -} - -/* cmp m64 to r64 */ -emitterT void eCMP64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp r64 to r64 -emitterT void eCMP64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp imm32 to r32 */ -emitterT void eCMP32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x3D ); - } - else { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write32( from ); -} - -/* cmp imm32 to m32 */ -emitterT void eCMP32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* cmp r32 to r32 */ -emitterT void eCMP32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp m32 to r32 */ -emitterT void eCMP32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp imm8 to [r32] -emitterT void eCMP32I8toRm( x86IntRegType to, u8 from) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 0, 7, to ); - write8(from); -} - -// cmp imm32 to [r32+off] -emitterT void eCMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 1, 7, to ); - write8(off); - write8(from); -} - -// cmp imm8 to [r32] -emitterT void eCMP32I8toM( uptr to, u8 from) -{ - write8( 0x83 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* cmp imm16 to r16 */ -emitterT void eCMP16ItoR( x86IntRegType to, u16 from ) -{ - write8( 0x66 ); - RexB(0,to); - if ( to == EAX ) - { - write8( 0x3D ); - } - else - { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write16( from ); -} - -/* cmp imm16 to m16 */ -emitterT void eCMP16ItoM( uptr to, u16 from ) -{ - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* cmp r16 to r16 */ -emitterT void eCMP16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp m16 to r16 */ -emitterT void eCMP16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp imm8 to r8 -emitterT void eCMP8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) - { - write8( 0x3C ); - } - else - { - write8( 0x80 ); - ModRM( 3, 7, to ); - } - write8( from ); -} - -// cmp m8 to r8 -emitterT void eCMP8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x3A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* test imm32 to r32 */ -emitterT void eTEST32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) - { - write8( 0xA9 ); - } - else - { - write8( 0xF7 ); - ModRM( 3, 0, to ); - } - write32( from ); -} - -emitterT void eTEST32ItoM( uptr to, u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* test r32 to r32 */ -emitterT void eTEST32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x85 ); - ModRM( 3, from, to ); -} - -// test imm32 to [r32] -emitterT void eTEST32ItoRm( x86IntRegType to, u32 from ) -{ - RexB(0,to); - write8( 0xF7 ); - ModRM( 0, 0, to ); - write32(from); -} - -// test imm16 to r16 -emitterT void eTEST16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) - { - write8( 0xA9 ); - } - else - { - write8( 0xF7 ); - ModRM( 3, 0, to ); - } - write16( from ); -} - -// test r16 to r16 -emitterT void eTEST16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,from,to); - write8( 0x85 ); - ModRM( 3, from, to ); -} - -// test r8 to r8 -emitterT void eTEST8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x84 ); - ModRM( 3, from, to ); -} - - -// test imm8 to r8 -emitterT void eTEST8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) - { - write8( 0xA8 ); - } - else - { - write8( 0xF6 ); - ModRM( 3, 0, to ); - } - write8( from ); -} - -// test imm8 to r8 -emitterT void eTEST8ItoM( uptr to, u8 from ) -{ - write8( 0xF6 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* sets r8 */ -emitterT void eSETS8R( x86IntRegType to ) -{ - SET8R( 0x98, to ); -} - -/* setl r8 */ -emitterT void eSETL8R( x86IntRegType to ) -{ - SET8R( 0x9C, to ); -} - -// setge r8 -emitterT void eSETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } -// setg r8 -emitterT void eSETG8R( x86IntRegType to ) { SET8R(0x9f, to); } -// seta r8 -emitterT void eSETA8R( x86IntRegType to ) { SET8R(0x97, to); } -// setae r8 -emitterT void eSETAE8R( x86IntRegType to ) { SET8R(0x99, to); } -/* setb r8 */ -emitterT void eSETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } -/* setb r8 */ -emitterT void eSETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } -// setz r8 -emitterT void eSETZ8R( x86IntRegType to ) { SET8R(0x94, to); } -// sete r8 -emitterT void eSETE8R( x86IntRegType to ) { SET8R(0x94, to); } - -/* push imm32 */ -emitterT void ePUSH32I( u32 from ) -{; -write8( 0x68 ); -write32( from ); -} - -/* push r32 */ -emitterT void ePUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } - -/* push m32 */ -emitterT void ePUSH32M( u32 from ) -{ - write8( 0xFF ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* pop r32 */ -emitterT void ePOP32R( x86IntRegType from ) { write8( 0x58 | from ); } - -/* pushad */ -emitterT void ePUSHA32( void ) { write8( 0x60 ); } - -/* popad */ -emitterT void ePOPA32( void ) { write8( 0x61 ); } - -emitterT void ePUSHR(x86IntRegType from) { ePUSH32R(from); } -emitterT void ePOPR(x86IntRegType from) { ePOP32R(from); } - - -/* pushfd */ -emitterT void ePUSHFD( void ) { write8( 0x9C ); } -/* popfd */ -emitterT void ePOPFD( void ) { write8( 0x9D ); } - -emitterT void eRET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); } - -emitterT void eCBW( void ) { write16( 0x9866 ); } -emitterT void eCWD( void ) { write8( 0x98 ); } -emitterT void eCDQ( void ) { write8( 0x99 ); } -emitterT void eCWDE() { write8(0x98); } - -emitterT void eLAHF() { write8(0x9f); } -emitterT void eSAHF() { write8(0x9e); } - -emitterT void eBT32ItoR( x86IntRegType to, u8 from ) -{ - write16( 0xBA0F ); - ModRM(3, 4, to); - write8( from ); -} - -emitterT void eBTR32ItoR( x86IntRegType to, u8 from ) -{ - write16( 0xBA0F ); - ModRM(3, 6, to); - write8( from ); -} - -emitterT void eBSRRtoR(x86IntRegType to, x86IntRegType from) -{ - write16( 0xBD0F ); - ModRM( 3, from, to ); -} - -emitterT void eBSWAP32R( x86IntRegType to ) -{ - write8( 0x0F ); - write8( 0xC8 + to ); -} - -// to = from + offset -emitterT void eLEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) -{ - write8(0x66); - eLEA32RtoR(to, from, offset); -} - -emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) -{ - RexRB(0,to,from); - write8(0x8d); - - if( (from&7) == ESP ) { - if( offset == 0 ) { - ModRM(1, to, from); - write8(0x24); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM(1, to, from); - write8(0x24); - write8(offset); - } - else { - ModRM(2, to, from); - write8(0x24); - write32(offset); - } - } - else { - if( offset == 0 && from != EBP && from!=ESP ) { - ModRM(0, to, from); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM(1, to, from); - write8(offset); - } - else { - ModRM(2, to, from); - write32(offset); - } - } -} - -// to = from0 + from1 -emitterT void eLEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) -{ - write8(0x66); - eLEA32RRtoR(to, from0, from1); -} - -emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) -{ - RexRXB(0, to, from0, from1); - write8(0x8d); - - if( (from1&7) == EBP ) { - ModRM(1, to, 4); - ModRM(0, from0, from1); - write8(0); - } - else { - ModRM(0, to, 4); - ModRM(0, from0, from1); - } -} - -// to = from << scale (max is 3) -emitterT void eLEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) -{ - write8(0x66); - eLEA32RStoR(to, from, scale); -} - -// Don't inline recursive functions -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) -{ - if( to == from ) { - eSHL32ItoR(to, scale); - return; - } - - if( from != ESP ) { - RexRXB(0,to,from,0); - write8(0x8d); - ModRM(0, to, 4); - ModRM(scale, from, 5); - write32(0); - } - else { - assert( to != ESP ); - eMOV32RtoR(to, from); - eLEA32RStoR(to, to, scale); - } -} diff --git a/pcsx2/x86/ix86/ix86_3dnow.cpp b/pcsx2/x86/ix86/ix86_3dnow.cpp new file mode 100644 index 0000000000..ae6743cc3d --- /dev/null +++ b/pcsx2/x86/ix86/ix86_3dnow.cpp @@ -0,0 +1,202 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" +#include "ix86_internal.h" + +//------------------------------------------------------------------ +// 3DNOW instructions +//------------------------------------------------------------------ + +/* femms */ +emitterT void FEMMS( void ) +{ + write16( 0x0E0F ); +} + +emitterT void PFCMPEQMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xB0 ); +} + +emitterT void PFCMPGTMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xA0 ); +} + +emitterT void PFCMPGEMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x90 ); +} + +emitterT void PFADDMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x9E ); +} + +emitterT void PFADDRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9E ); +} + +emitterT void PFSUBMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x9A ); +} + +emitterT void PFSUBRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9A ); +} + +emitterT void PFMULMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xB4 ); +} + +emitterT void PFMULRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB4 ); +} + +emitterT void PFRCPMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x96 ); +} + +emitterT void PFRCPRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x96 ); +} + +emitterT void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA6 ); +} + +emitterT void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB6 ); +} + +emitterT void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x97 ); +} + +emitterT void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA7 ); +} + +emitterT void PF2IDMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x1D ); +} + +emitterT void PF2IDRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x1D ); +} + +emitterT void PI2FDMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x0D ); +} + +emitterT void PI2FDRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x0D ); +} + +emitterT void PFMAXMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xA4 ); +} + +emitterT void PFMAXRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA4 ); +} + +emitterT void PFMINMtoR( x86IntRegType to, uptr from ) +{ + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x94 ); +} + +emitterT void PFMINRtoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x94 ); +} diff --git a/pcsx2/x86/ix86/ix86_3dnow.inl b/pcsx2/x86/ix86/ix86_3dnow.inl deleted file mode 100644 index 5fdcce2347..0000000000 --- a/pcsx2/x86/ix86/ix86_3dnow.inl +++ /dev/null @@ -1,201 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -//------------------------------------------------------------------ -// 3DNOW instructions -//------------------------------------------------------------------ - -/* femms */ -emitterT void eFEMMS( void ) -{ - write16( 0x0E0F ); -} - -emitterT void ePFCMPEQMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB0 ); -} - -emitterT void ePFCMPGTMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA0 ); -} - -emitterT void ePFCMPGEMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x90 ); -} - -emitterT void ePFADDMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9E ); -} - -emitterT void ePFADDRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x9E ); -} - -emitterT void ePFSUBMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9A ); -} - -emitterT void ePFSUBRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x9A ); -} - -emitterT void ePFMULMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB4 ); -} - -emitterT void ePFMULRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xB4 ); -} - -emitterT void ePFRCPMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x96 ); -} - -emitterT void ePFRCPRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x96 ); -} - -emitterT void ePFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA6 ); -} - -emitterT void ePFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xB6 ); -} - -emitterT void ePFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x97 ); -} - -emitterT void ePFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA7 ); -} - -emitterT void ePF2IDMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x1D ); -} - -emitterT void ePF2IDRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x1D ); -} - -emitterT void ePI2FDMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x0D ); -} - -emitterT void ePI2FDRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x0D ); -} - -emitterT void ePFMAXMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA4 ); -} - -emitterT void ePFMAXRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA4 ); -} - -emitterT void ePFMINMtoR( x86IntRegType to, uptr from ) -{ - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x94 ); -} - -emitterT void ePFMINRtoR( x86IntRegType to, x86IntRegType from ) -{ - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x94 ); -} diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index bf88feeb32..b1fc04a96e 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -18,10 +18,8 @@ #include "PrecompiledHeader.h" -#define _EmitterId_ 0 - -#include "ix86.h" -#include "Misc.h" +#include "ix86_internal.h" +#include "System.h" #include "Threading.h" #include "RedtapeWindows.h" @@ -400,6 +398,7 @@ void cpudetectInit() cpudetectSSE3(recSSE); HostSys::Munmap( recSSE, 0x1000 ); } + else { Console::Error("Error: Failed to allocate memory for SSE3 State detection."); } ////////////////////////////////////// // Core Counting! diff --git a/pcsx2/x86/ix86/ix86_fpu.cpp b/pcsx2/x86/ix86/ix86_fpu.cpp new file mode 100644 index 0000000000..d7e3a65963 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_fpu.cpp @@ -0,0 +1,276 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" +#include "ix86_internal.h" + +//------------------------------------------------------------------ +// FPU instructions +//------------------------------------------------------------------ + +/* fild m32 to fpu reg stack */ +emitterT void FILD32( u32 from ) +{ + write8( 0xDB ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fistp m32 from fpu reg stack */ +emitterT void FISTP32( u32 from ) +{ + write8( 0xDB ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fld m32 to fpu reg stack */ +emitterT void FLD32( u32 from ) +{ + write8( 0xD9 ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +// fld st(i) +emitterT void FLD(int st) { write16(0xc0d9+(st<<8)); } +emitterT void FLD1() { write16(0xe8d9); } +emitterT void FLDL2E() { write16(0xead9); } + +/* fst m32 from fpu reg stack */ +emitterT void FST32( u32 to ) +{ + write8( 0xD9 ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(to, 4) ); +} + +/* fstp m32 from fpu reg stack */ +emitterT void FSTP32( u32 to ) +{ + write8( 0xD9 ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(to, 4) ); +} + +// fstp st(i) +emitterT void FSTP(int st) { write16(0xd8dd+(st<<8)); } + +/* fldcw fpu control word from m16 */ +emitterT void FLDCW( u32 from ) +{ + write8( 0xD9 ); + ModRM( 0, 0x5, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fnstcw fpu control word to m16 */ +emitterT void FNSTCW( u32 to ) +{ + write8( 0xD9 ); + ModRM( 0, 0x7, DISP32 ); + write32( MEMADDR(to, 4) ); +} + +emitterT void FNSTSWtoAX() { write16(0xE0DF); } +emitterT void FXAM() { write16(0xe5d9); } +emitterT void FDECSTP() { write16(0xf6d9); } +emitterT void FRNDINT() { write16(0xfcd9); } +emitterT void FXCH(int st) { write16(0xc8d9+(st<<8)); } +emitterT void F2XM1() { write16(0xf0d9); } +emitterT void FSCALE() { write16(0xfdd9); } +emitterT void FPATAN(void) { write16(0xf3d9); } +emitterT void FSIN(void) { write16(0xfed9); } + +/* fadd ST(src) to fpu reg stack ST(0) */ +emitterT void FADD32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xC0 + src ); +} + +/* fadd ST(0) to fpu reg stack ST(src) */ +emitterT void FADD320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xC0 + src ); +} + +/* fsub ST(src) to fpu reg stack ST(0) */ +emitterT void FSUB32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xE0 + src ); +} + +/* fsub ST(0) to fpu reg stack ST(src) */ +emitterT void FSUB320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xE8 + src ); +} + +/* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */ +emitterT void FSUBP( void ) +{ + write8( 0xDE ); + write8( 0xE9 ); +} + +/* fmul ST(src) to fpu reg stack ST(0) */ +emitterT void FMUL32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xC8 + src ); +} + +/* fmul ST(0) to fpu reg stack ST(src) */ +emitterT void FMUL320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xC8 + src ); +} + +/* fdiv ST(src) to fpu reg stack ST(0) */ +emitterT void FDIV32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xF0 + src ); +} + +/* fdiv ST(0) to fpu reg stack ST(src) */ +emitterT void FDIV320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xF8 + src ); +} + +emitterT void FDIV320toRP( x86IntRegType src ) +{ + write8( 0xDE ); + write8( 0xF8 + src ); +} + +/* fadd m32 to fpu reg stack */ +emitterT void FADD32( u32 from ) +{ + write8( 0xD8 ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fsub m32 to fpu reg stack */ +emitterT void FSUB32( u32 from ) +{ + write8( 0xD8 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fmul m32 to fpu reg stack */ +emitterT void FMUL32( u32 from ) +{ + write8( 0xD8 ); + ModRM( 0, 0x1, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fdiv m32 to fpu reg stack */ +emitterT void FDIV32( u32 from ) +{ + write8( 0xD8 ); + ModRM( 0, 0x6, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fabs fpu reg stack */ +emitterT void FABS( void ) +{ + write16( 0xE1D9 ); +} + +/* fsqrt fpu reg stack */ +emitterT void FSQRT( void ) +{ + write16( 0xFAD9 ); +} + +/* fchs fpu reg stack */ +emitterT void FCHS( void ) +{ + write16( 0xE0D9 ); +} + +/* fcomi st, st(i) */ +emitterT void FCOMI( x86IntRegType src ) +{ + write8( 0xDB ); + write8( 0xF0 + src ); +} + +/* fcomip st, st(i) */ +emitterT void FCOMIP( x86IntRegType src ) +{ + write8( 0xDF ); + write8( 0xF0 + src ); +} + +/* fucomi st, st(i) */ +emitterT void FUCOMI( x86IntRegType src ) +{ + write8( 0xDB ); + write8( 0xE8 + src ); +} + +/* fucomip st, st(i) */ +emitterT void FUCOMIP( x86IntRegType src ) +{ + write8( 0xDF ); + write8( 0xE8 + src ); +} + +/* fcom m32 to fpu reg stack */ +emitterT void FCOM32( u32 from ) +{ + write8( 0xD8 ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* fcomp m32 to fpu reg stack */ +emitterT void FCOMP32( u32 from ) +{ + write8( 0xD8 ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +#define FCMOV32( low, high ) \ + { \ + write8( low ); \ + write8( high + from ); \ + } + +emitterT void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } +emitterT void FCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } +emitterT void FCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } +emitterT void FCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } +emitterT void FCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } +emitterT void FCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } +emitterT void FCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } +emitterT void FCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } diff --git a/pcsx2/x86/ix86/ix86_fpu.inl b/pcsx2/x86/ix86/ix86_fpu.inl deleted file mode 100644 index ae20b6c4ec..0000000000 --- a/pcsx2/x86/ix86/ix86_fpu.inl +++ /dev/null @@ -1,276 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once -//#include "PrecompiledHeader.h" - -//------------------------------------------------------------------ -// FPU instructions -//------------------------------------------------------------------ - -/* fild m32 to fpu reg stack */ -emitterT void eFILD32( u32 from ) -{ - write8( 0xDB ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fistp m32 from fpu reg stack */ -emitterT void eFISTP32( u32 from ) -{ - write8( 0xDB ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fld m32 to fpu reg stack */ -emitterT void eFLD32( u32 from ) -{ - write8( 0xD9 ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// fld st(i) -emitterT void eFLD(int st) { write16(0xc0d9+(st<<8)); } -emitterT void eFLD1() { write16(0xe8d9); } -emitterT void eFLDL2E() { write16(0xead9); } - -/* fst m32 from fpu reg stack */ -emitterT void eFST32( u32 to ) -{ - write8( 0xD9 ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* fstp m32 from fpu reg stack */ -emitterT void eFSTP32( u32 to ) -{ - write8( 0xD9 ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -// fstp st(i) -emitterT void eFSTP(int st) { write16(0xd8dd+(st<<8)); } - -/* fldcw fpu control word from m16 */ -emitterT void eFLDCW( u32 from ) -{ - write8( 0xD9 ); - ModRM( 0, 0x5, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fnstcw fpu control word to m16 */ -emitterT void eFNSTCW( u32 to ) -{ - write8( 0xD9 ); - ModRM( 0, 0x7, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -emitterT void eFNSTSWtoAX() { write16(0xE0DF); } -emitterT void eFXAM() { write16(0xe5d9); } -emitterT void eFDECSTP() { write16(0xf6d9); } -emitterT void eFRNDINT() { write16(0xfcd9); } -emitterT void eFXCH(int st) { write16(0xc8d9+(st<<8)); } -emitterT void eF2XM1() { write16(0xf0d9); } -emitterT void eFSCALE() { write16(0xfdd9); } -emitterT void eFPATAN(void) { write16(0xf3d9); } -emitterT void eFSIN(void) { write16(0xfed9); } - -/* fadd ST(src) to fpu reg stack ST(0) */ -emitterT void eFADD32Rto0( x86IntRegType src ) -{ - write8( 0xD8 ); - write8( 0xC0 + src ); -} - -/* fadd ST(0) to fpu reg stack ST(src) */ -emitterT void eFADD320toR( x86IntRegType src ) -{ - write8( 0xDC ); - write8( 0xC0 + src ); -} - -/* fsub ST(src) to fpu reg stack ST(0) */ -emitterT void eFSUB32Rto0( x86IntRegType src ) -{ - write8( 0xD8 ); - write8( 0xE0 + src ); -} - -/* fsub ST(0) to fpu reg stack ST(src) */ -emitterT void eFSUB320toR( x86IntRegType src ) -{ - write8( 0xDC ); - write8( 0xE8 + src ); -} - -/* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */ -emitterT void eFSUBP( void ) -{ - write8( 0xDE ); - write8( 0xE9 ); -} - -/* fmul ST(src) to fpu reg stack ST(0) */ -emitterT void eFMUL32Rto0( x86IntRegType src ) -{ - write8( 0xD8 ); - write8( 0xC8 + src ); -} - -/* fmul ST(0) to fpu reg stack ST(src) */ -emitterT void eFMUL320toR( x86IntRegType src ) -{ - write8( 0xDC ); - write8( 0xC8 + src ); -} - -/* fdiv ST(src) to fpu reg stack ST(0) */ -emitterT void eFDIV32Rto0( x86IntRegType src ) -{ - write8( 0xD8 ); - write8( 0xF0 + src ); -} - -/* fdiv ST(0) to fpu reg stack ST(src) */ -emitterT void eFDIV320toR( x86IntRegType src ) -{ - write8( 0xDC ); - write8( 0xF8 + src ); -} - -emitterT void eFDIV320toRP( x86IntRegType src ) -{ - write8( 0xDE ); - write8( 0xF8 + src ); -} - -/* fadd m32 to fpu reg stack */ -emitterT void eFADD32( u32 from ) -{ - write8( 0xD8 ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fsub m32 to fpu reg stack */ -emitterT void eFSUB32( u32 from ) -{ - write8( 0xD8 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fmul m32 to fpu reg stack */ -emitterT void eFMUL32( u32 from ) -{ - write8( 0xD8 ); - ModRM( 0, 0x1, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fdiv m32 to fpu reg stack */ -emitterT void eFDIV32( u32 from ) -{ - write8( 0xD8 ); - ModRM( 0, 0x6, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fabs fpu reg stack */ -emitterT void eFABS( void ) -{ - write16( 0xE1D9 ); -} - -/* fsqrt fpu reg stack */ -emitterT void eFSQRT( void ) -{ - write16( 0xFAD9 ); -} - -/* fchs fpu reg stack */ -emitterT void eFCHS( void ) -{ - write16( 0xE0D9 ); -} - -/* fcomi st, st(i) */ -emitterT void eFCOMI( x86IntRegType src ) -{ - write8( 0xDB ); - write8( 0xF0 + src ); -} - -/* fcomip st, st(i) */ -emitterT void eFCOMIP( x86IntRegType src ) -{ - write8( 0xDF ); - write8( 0xF0 + src ); -} - -/* fucomi st, st(i) */ -emitterT void eFUCOMI( x86IntRegType src ) -{ - write8( 0xDB ); - write8( 0xE8 + src ); -} - -/* fucomip st, st(i) */ -emitterT void eFUCOMIP( x86IntRegType src ) -{ - write8( 0xDF ); - write8( 0xE8 + src ); -} - -/* fcom m32 to fpu reg stack */ -emitterT void eFCOM32( u32 from ) -{ - write8( 0xD8 ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* fcomp m32 to fpu reg stack */ -emitterT void eFCOMP32( u32 from ) -{ - write8( 0xD8 ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -#define FCMOV32( low, high ) \ - { \ - write8( low ); \ - write8( high + from ); \ - } - -emitterT void eFCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } -emitterT void eFCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } -emitterT void eFCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } -emitterT void eFCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } -emitterT void eFCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } -emitterT void eFCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } -emitterT void eFCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } -emitterT void eFCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp new file mode 100644 index 0000000000..f76950c0ef --- /dev/null +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -0,0 +1,225 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" +#include "ix86_internal.h" + +//------------------------------------------------------------------ +// x86 Group 1 Instructions +//------------------------------------------------------------------ +// Group 1 instructions all adhere to the same encoding scheme, and so they all +// share the same emitter which has been coded here. +// +// Group 1 Table: [column value is the Reg field of the ModRM byte] +// +// 0 1 2 3 4 5 6 7 +// ADD OR ADC SBB AND SUB XOR CMP +// + +namespace x86Emitter { + +////////////////////////////////////////////////////////////////////////////////////////// +// x86RegConverter - this class is used internally by the emitter as a helper for +// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API +// can use type-safe 8/16/32 bit register types, and the underlying code can use a single +// unified emitter to generate all function variations + prefixes and such. :) +// +class x86RegConverter : public x86Register32 +{ +public: + x86RegConverter( x86Register32 src ) : x86Register32( src ) {} + x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {} + x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {} +}; + +enum Group1InstructionType +{ + G1Type_ADD=0, + G1Type_OR, + G1Type_ADC, + G1Type_SBB, + G1Type_AND, + G1Type_SUB, + G1Type_XOR, + G1Type_CMP +}; + + +static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false ) +{ + write8( (bit8form ? 0 : 1) | (inst<<3) ); + ModRM( 3, from.Id, to.Id ); +} + +static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false ) +{ + write8( (bit8form ? 0 : 1) | (inst<<3) ); + EmitSibMagic( from, sibdest ); +} + +static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false ) +{ + write8( (bit8form ? 2 : 3) | (inst<<3) ); + EmitSibMagic( to, sibsrc ); +} + +// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit +// instruction (AX,BX,etc). +template< typename T > +static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm ) +{ + bool bit8form = (sizeof(T) == 1); + + if( !bit8form && is_s8( imm ) ) + { + write8( 0x83 ); + ModRM( 3, inst, to.Id ); + write8( (s8)imm ); + } + else + { + if( to == eax ) + write8( (bit8form ? 4 : 5) | (inst<<3) ); + else + { + write8( bit8form ? 0x80 : 0x81 ); + ModRM( 3, inst, to.Id ); + } + x86write( imm ); + } +} + +// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit +// instruction (AX,BX,etc). +template< typename T > +static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) +{ + bool bit8form = (sizeof(T) == 1); + + write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) ); + + EmitSibMagic( inst, sibdest ); + + if( !bit8form && is_s8( imm ) ) + write8( (s8)imm ); + else + x86write( imm ); +} + +// 16 bit instruction prefix! +static __forceinline void prefix16() { write8(0x66); } + +////////////////////////////////////////////////////////////////////////////////////////// +// +#define DEFINE_GROUP1_OPCODE( cod ) \ + emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + \ + emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ + \ + emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \ + emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \ + emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } + +DEFINE_GROUP1_OPCODE( ADD ) +DEFINE_GROUP1_OPCODE( CMP ) +DEFINE_GROUP1_OPCODE( OR ) +DEFINE_GROUP1_OPCODE( ADC ) +DEFINE_GROUP1_OPCODE( SBB ) +DEFINE_GROUP1_OPCODE( AND ) +DEFINE_GROUP1_OPCODE( SUB ) +DEFINE_GROUP1_OPCODE( XOR ) + +} // end namespace x86Emitter + + +static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src ) +{ + return x86Emitter::x86Register32( src ); +} + +static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) +{ + return x86Emitter::x86Register16( src ); +} + +static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src ) +{ + return x86Emitter::x86Register8( src ); +} + +static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src ) +{ + return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// +#define DEFINE_LEGACY_HELPER( cod, bits ) \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); } + +#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \ + DEFINE_LEGACY_HELPER( cod, 32 ) \ + DEFINE_LEGACY_HELPER( cod, 16 ) \ + DEFINE_LEGACY_HELPER( cod, 8 ) + +DEFINE_GROUP1_OPCODE_LEGACY( ADD ) +DEFINE_GROUP1_OPCODE_LEGACY( CMP ) +DEFINE_GROUP1_OPCODE_LEGACY( OR ) +DEFINE_GROUP1_OPCODE_LEGACY( ADC ) +DEFINE_GROUP1_OPCODE_LEGACY( SBB ) +DEFINE_GROUP1_OPCODE_LEGACY( AND ) +DEFINE_GROUP1_OPCODE_LEGACY( SUB ) +DEFINE_GROUP1_OPCODE_LEGACY( XOR ) + +// Special forms needed by the legacy emitter syntax: + +emitterT void AND32I8toR( x86IntRegType to, s8 from ) +{ + x86Emitter::AND32( _reghlp32(to), from ); +} + +emitterT void AND32I8toM( uptr to, s8 from ) +{ + x86Emitter::AND32( (void*)to, from ); +} diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h new file mode 100644 index 0000000000..4f3f72f2e4 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -0,0 +1,43 @@ + +#pragma once +#include "ix86.h" + +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ + +#define MEMADDR(addr, oplen) (addr) + +#define Rex(w,r,x,b) assert(0) +#define RexR(w, reg) assert( !(w || (reg)>=8) ) +#define RexB(w, base) assert( !(w || (base)>=8) ) +#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) +#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) + +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) + +static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) +static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + + +//------------------------------------------------------------------ +// General Emitter Helper functions +//------------------------------------------------------------------ + +namespace x86Emitter +{ + extern void EmitSibMagic( int regfield, const ModSib& info ); + extern void EmitSibMagic( x86Register32 regfield, const ModSib& info ); + extern bool NeedsSibMagic( const ModSib& info ); +} + +// From here out are the legacy (old) emitter functions... + +extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); +extern void ModRM( int mod, int reg, int rm ); +extern void SibSB( int ss, int index, int base ); +extern void SET8R( int cc, int to ); +extern u8* J8Rel( int cc, int to ); +extern u32* J32Rel( int cc, u32 to ); +extern u64 GetCPUTick( void ); +//------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp new file mode 100644 index 0000000000..01a17a729e --- /dev/null +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -0,0 +1,1820 @@ +/* Pcsx2 - Pc Ps2 Emulator +* Copyright (C) 2002-2009 Pcsx2 Team +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ +/* +* ix86 core v0.6.2 +* Authors: linuzappz +* alexey silinov +* goldfinger +* zerofrog(@gmail.com) +* cottonvibes(@gmail.com) +*/ + +//------------------------------------------------------------------ +// ix86 legacy emitter functions +//------------------------------------------------------------------ + +#include "PrecompiledHeader.h" +#include "System.h" +#include "ix86_internal.h" + +using namespace x86Emitter; + +// Note: the 'to' field can either be a register or a special opcode extension specifier +// depending on the opcode's encoding. + +emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) +{ + if ((from&7) == ESP) { + if( offset == 0 ) { + ModRM( 0, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + } + else if( is_s8( offset ) ) { + ModRM( 1, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write8(offset); + } + else { + ModRM( 2, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write32(offset); + } + } + else { + if( offset == 0 ) { + ModRM( 0, to, from ); + } + else if( is_s8( offset ) ) { + ModRM( 1, to, from ); + write8(offset); + } + else { + ModRM( 2, to, from ); + write32(offset); + } + } +} + +emitterT void ModRM( s32 mod, s32 reg, s32 rm ) +{ + write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); +} + +emitterT void SibSB( s32 ss, s32 index, s32 base ) +{ + write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); +} + +emitterT void SET8R( int cc, int to ) +{ + RexB(0, to); + write8( 0x0F ); + write8( cc ); + write8( 0xC0 | ( to ) ); +} + +emitterT u8* J8Rel( int cc, int to ) +{ + write8( cc ); + write8( to ); + return (u8*)(x86Ptr - 1); +} + +emitterT u16* J16Rel( int cc, u32 to ) +{ + write16( 0x0F66 ); + write8( cc ); + write16( to ); + return (u16*)( x86Ptr - 2 ); +} + +emitterT u32* J32Rel( int cc, u32 to ) +{ + write8( 0x0F ); + write8( cc ); + write32( to ); + return (u32*)( x86Ptr - 4 ); +} + +emitterT void CMOV32RtoR( int cc, int to, int from ) +{ + RexRB(0, to, from); + write8( 0x0F ); + write8( cc ); + ModRM( 3, to, from ); +} + +emitterT void CMOV32MtoR( int cc, int to, uptr from ) +{ + RexR(0, to); + write8( 0x0F ); + write8( cc ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +//////////////////////////////////////////////////// +emitterT u8* x86SetPtr( u8* ptr ) +{ + u8* rv=x86Ptr; + if (ptr!=0) + x86Ptr = ptr; + return rv; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// Jump Label API (as rough as it might be) +// +// I don't auto-inline these because of the console logging in case of error, which tends +// to cause quite a bit of code bloat. +// +void x86SetJ8( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + *j8 = (u8)jump; +} + +void x86SetJ8A( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + + if( ((uptr)x86Ptr&0xf) > 4 ) { + + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); + + if( newjump <= 0x7f ) { + jump = newjump; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + } + } + *j8 = (u8)jump; +} + +//////////////////////////////////////////////////// +emitterT void x86SetJ32( u32* j32 ) +{ + *j32 = ( x86Ptr - (u8*)j32 ) - 4; +} + +emitterT void x86SetJ32A( u32* j32 ) +{ + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); +} + +//////////////////////////////////////////////////// +emitterT void x86Align( int bytes ) +{ + // forward align + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); +} + +//////////////////////////////////////////////////// +// Generates executable code to align to the given alignment (could be useful for the second leg +// of if/else conditionals, which usually fall through a jump target label). +// +// Note: Left in for now just in case, but usefulness is moot. Only K8's and older (non-Prescott) +// P4s benefit from this, and we don't optimize for those platforms anyway. +// +void x86AlignExecutable( int align ) +{ + uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); + uptr bytes = ( newx86 - (uptr)x86Ptr ); + + switch( bytes ) + { + case 0: break; + + case 1: NOP(); break; + case 2: MOV32RtoR( ESI, ESI ); break; + case 3: write8(0x08D); write8(0x024); write8(0x024); break; + case 5: NOP(); // falls through to 4... + case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; + case 6: write8(0x08D); write8(0x0B6); write32(0); break; + case 8: NOP(); // falls through to 7... + case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; + + default: + { + // for larger alignments, just use a JMP... + u8* aligned_target = JMP8(0); + x86Ptr = (u8*)newx86; + x86SetJ8( aligned_target ); + } + } + + jASSUME( x86Ptr == (u8*)newx86 ); +} + +/********************/ +/* IX86 instructions */ +/********************/ + +emitterT void STC( void ) +{ + write8( 0xF9 ); +} + +emitterT void CLC( void ) +{ + write8( 0xF8 ); +} + +// NOP 1-byte +emitterT void NOP( void ) +{ + write8(0x90); +} + + +//////////////////////////////////// +// mov instructions / +//////////////////////////////////// + +/* mov r32 to r32 */ +emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) +{ + if( to == from ) return; + + RexRB(0, from, to); + write8( 0x89 ); + ModRM( 3, from, to ); +} + +/* mov r32 to m32 */ +emitterT void MOV32RtoM( uptr to, x86IntRegType from ) +{ + RexR(0, from); + if (from == EAX) { + write8(0xA3); + } else { + write8( 0x89 ); + ModRM( 0, from, DISP32 ); + } + write32( MEMADDR(to, 4) ); +} + +/* mov m32 to r32 */ +emitterT void MOV32MtoR( x86IntRegType to, uptr from ) +{ + RexR(0, to); + if (to == EAX) { + write8(0xA1); + } else { + write8( 0x8B ); + ModRM( 0, to, DISP32 ); + } + write32( MEMADDR(from, 4) ); +} + +emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); +} + +/* mov [r32+r32*scale] to r32 */ +emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + RexRXB(0,to,from2,from); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); +} + +// mov r32 to [r32< 0x7f ) { + assert( to <= 0xffffffff ); + return (u8*)JMP32( to ); + } + else { + return (u8*)JMP8( to ); + } +} + +/* jmp rel8 */ +emitterT u8* JMP8( u8 to ) +{ + write8( 0xEB ); + write8( to ); + return x86Ptr - 1; +} + +/* jmp rel32 */ +emitterT u32* JMP32( uptr to ) +{ + assert( (sptr)to <= 0x7fffffff && (sptr)to >= -0x7fffffff ); + write8( 0xE9 ); + write32( to ); + return (u32*)(x86Ptr - 4 ); +} + +/* jmp r32/r64 */ +emitterT void JMPR( x86IntRegType to ) +{ + RexB(0, to); + write8( 0xFF ); + ModRM( 3, 4, to ); +} + +// jmp m32 +emitterT void JMP32M( uptr to ) +{ + write8( 0xFF ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 4)); +} + +/* jp rel8 */ +emitterT u8* JP8( u8 to ) { + return J8Rel( 0x7A, to ); +} + +/* jnp rel8 */ +emitterT u8* JNP8( u8 to ) { + return J8Rel( 0x7B, to ); +} + +/* je rel8 */ +emitterT u8* JE8( u8 to ) { + return J8Rel( 0x74, to ); +} + +/* jz rel8 */ +emitterT u8* JZ8( u8 to ) +{ + return J8Rel( 0x74, to ); +} + +/* js rel8 */ +emitterT u8* JS8( u8 to ) +{ + return J8Rel( 0x78, to ); +} + +/* jns rel8 */ +emitterT u8* JNS8( u8 to ) +{ + return J8Rel( 0x79, to ); +} + +/* jg rel8 */ +emitterT u8* JG8( u8 to ) +{ + return J8Rel( 0x7F, to ); +} + +/* jge rel8 */ +emitterT u8* JGE8( u8 to ) +{ + return J8Rel( 0x7D, to ); +} + +/* jl rel8 */ +emitterT u8* JL8( u8 to ) +{ + return J8Rel( 0x7C, to ); +} + +/* ja rel8 */ +emitterT u8* JA8( u8 to ) +{ + return J8Rel( 0x77, to ); +} + +emitterT u8* JAE8( u8 to ) +{ + return J8Rel( 0x73, to ); +} + +/* jb rel8 */ +emitterT u8* JB8( u8 to ) +{ + return J8Rel( 0x72, to ); +} + +/* jbe rel8 */ +emitterT u8* JBE8( u8 to ) +{ + return J8Rel( 0x76, to ); +} + +/* jle rel8 */ +emitterT u8* JLE8( u8 to ) +{ + return J8Rel( 0x7E, to ); +} + +/* jne rel8 */ +emitterT u8* JNE8( u8 to ) +{ + return J8Rel( 0x75, to ); +} + +/* jnz rel8 */ +emitterT u8* JNZ8( u8 to ) +{ + return J8Rel( 0x75, to ); +} + +/* jng rel8 */ +emitterT u8* JNG8( u8 to ) +{ + return J8Rel( 0x7E, to ); +} + +/* jnge rel8 */ +emitterT u8* JNGE8( u8 to ) +{ + return J8Rel( 0x7C, to ); +} + +/* jnl rel8 */ +emitterT u8* JNL8( u8 to ) +{ + return J8Rel( 0x7D, to ); +} + +/* jnle rel8 */ +emitterT u8* JNLE8( u8 to ) +{ + return J8Rel( 0x7F, to ); +} + +/* jo rel8 */ +emitterT u8* JO8( u8 to ) +{ + return J8Rel( 0x70, to ); +} + +/* jno rel8 */ +emitterT u8* JNO8( u8 to ) +{ + return J8Rel( 0x71, to ); +} +// jb rel32 +emitterT u32* JB32( u32 to ) +{ + return J32Rel( 0x82, to ); +} + +/* je rel32 */ +emitterT u32* JE32( u32 to ) +{ + return J32Rel( 0x84, to ); +} + +/* jz rel32 */ +emitterT u32* JZ32( u32 to ) +{ + return J32Rel( 0x84, to ); +} + +/* js rel32 */ +emitterT u32* JS32( u32 to ) +{ + return J32Rel( 0x88, to ); +} + +/* jns rel32 */ +emitterT u32* JNS32( u32 to ) +{ + return J32Rel( 0x89, to ); +} + +/* jg rel32 */ +emitterT u32* JG32( u32 to ) +{ + return J32Rel( 0x8F, to ); +} + +/* jge rel32 */ +emitterT u32* JGE32( u32 to ) +{ + return J32Rel( 0x8D, to ); +} + +/* jl rel32 */ +emitterT u32* JL32( u32 to ) +{ + return J32Rel( 0x8C, to ); +} + +/* jle rel32 */ +emitterT u32* JLE32( u32 to ) +{ + return J32Rel( 0x8E, to ); +} + +/* ja rel32 */ +emitterT u32* JA32( u32 to ) +{ + return J32Rel( 0x87, to ); +} + +/* jae rel32 */ +emitterT u32* JAE32( u32 to ) +{ + return J32Rel( 0x83, to ); +} + +/* jne rel32 */ +emitterT u32* JNE32( u32 to ) +{ + return J32Rel( 0x85, to ); +} + +/* jnz rel32 */ +emitterT u32* JNZ32( u32 to ) +{ + return J32Rel( 0x85, to ); +} + +/* jng rel32 */ +emitterT u32* JNG32( u32 to ) +{ + return J32Rel( 0x8E, to ); +} + +/* jnge rel32 */ +emitterT u32* JNGE32( u32 to ) +{ + return J32Rel( 0x8C, to ); +} + +/* jnl rel32 */ +emitterT u32* JNL32( u32 to ) +{ + return J32Rel( 0x8D, to ); +} + +/* jnle rel32 */ +emitterT u32* JNLE32( u32 to ) +{ + return J32Rel( 0x8F, to ); +} + +/* jo rel32 */ +emitterT u32* JO32( u32 to ) +{ + return J32Rel( 0x80, to ); +} + +/* jno rel32 */ +emitterT u32* JNO32( u32 to ) +{ + return J32Rel( 0x81, to ); +} + + + +/* call func */ +emitterT void CALLFunc( uptr func ) +{ + func -= ( (uptr)x86Ptr + 5 ); + assert( (sptr)func <= 0x7fffffff && (sptr)func >= -0x7fffffff ); + CALL32(func); +} + +/* call rel32 */ +emitterT void CALL32( u32 to ) +{ + write8( 0xE8 ); + write32( to ); +} + +/* call r32 */ +emitterT void CALL32R( x86IntRegType to ) +{ + write8( 0xFF ); + ModRM( 3, 2, to ); +} + +/* call m32 */ +emitterT void CALL32M( u32 to ) +{ + write8( 0xFF ); + ModRM( 0, 2, DISP32 ); + write32( MEMADDR(to, 4) ); +} + +//////////////////////////////////// +// misc instructions / +//////////////////////////////////// + +/* test imm32 to r32 */ +emitterT void TEST32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) + { + write8( 0xA9 ); + } + else + { + write8( 0xF7 ); + ModRM( 3, 0, to ); + } + write32( from ); +} + +emitterT void TEST32ItoM( uptr to, u32 from ) +{ + write8( 0xF7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); +} + +/* test r32 to r32 */ +emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x85 ); + ModRM( 3, from, to ); +} + +// test imm32 to [r32] +emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) +{ + RexB(0,to); + write8( 0xF7 ); + ModRM( 0, 0, to ); + write32(from); +} + +// test imm16 to r16 +emitterT void TEST16ItoR( x86IntRegType to, u16 from ) +{ + write8(0x66); + RexB(0,to); + if ( to == EAX ) + { + write8( 0xA9 ); + } + else + { + write8( 0xF7 ); + ModRM( 3, 0, to ); + } + write16( from ); +} + +// test r16 to r16 +emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) +{ + write8(0x66); + RexRB(0,from,to); + write8( 0x85 ); + ModRM( 3, from, to ); +} + +// test r8 to r8 +emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, from, to); + write8( 0x84 ); + ModRM( 3, from, to ); +} + + +// test imm8 to r8 +emitterT void TEST8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( to == EAX ) + { + write8( 0xA8 ); + } + else + { + write8( 0xF6 ); + ModRM( 3, 0, to ); + } + write8( from ); +} + +// test imm8 to r8 +emitterT void TEST8ItoM( uptr to, u8 from ) +{ + write8( 0xF6 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); +} + +/* sets r8 */ +emitterT void SETS8R( x86IntRegType to ) +{ + SET8R( 0x98, to ); +} + +/* setl r8 */ +emitterT void SETL8R( x86IntRegType to ) +{ + SET8R( 0x9C, to ); +} + +// setge r8 +emitterT void SETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } +// setg r8 +emitterT void SETG8R( x86IntRegType to ) { SET8R(0x9f, to); } +// seta r8 +emitterT void SETA8R( x86IntRegType to ) { SET8R(0x97, to); } +// setae r8 +emitterT void SETAE8R( x86IntRegType to ) { SET8R(0x99, to); } +/* setb r8 */ +emitterT void SETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } +/* setb r8 */ +emitterT void SETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } +// setz r8 +emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); } +// sete r8 +emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); } + +/* push imm32 */ +emitterT void PUSH32I( u32 from ) { PUSH( from ); } + +/* push r32 */ +emitterT void PUSH32R( x86IntRegType from ) { PUSH( x86Register32( from ) ); } + +/* push m32 */ +emitterT void PUSH32M( u32 from ) +{ + PUSH( ptr[from] ); +} + +/* pop r32 */ +emitterT void POP32R( x86IntRegType from ) { POP( x86Register32( from ) ); } + +/* pushfd */ +emitterT void PUSHFD( void ) { write8( 0x9C ); } +/* popfd */ +emitterT void POPFD( void ) { write8( 0x9D ); } + +emitterT void RET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); } + +emitterT void CBW( void ) { write16( 0x9866 ); } +emitterT void CWD( void ) { write8( 0x98 ); } +emitterT void CDQ( void ) { write8( 0x99 ); } +emitterT void CWDE() { write8(0x98); } + +emitterT void LAHF() { write8(0x9f); } +emitterT void SAHF() { write8(0x9e); } + +/* + Bit manupilation opcodes +*/ +emitterT void BT32ItoR( x86IntRegType to, u8 from ) +{ + write16( 0xBA0F ); + ModRM(3, 4, to); + write8( from ); +} + +emitterT void BTR32ItoR( x86IntRegType to, u8 from ) +{ + write16( 0xBA0F ); + ModRM(3, 6, to); + write8( from ); +} + +emitterT void BSRRtoR(x86IntRegType to, x86IntRegType from) +{ + write16( 0xBD0F ); + ModRM( 3, from, to ); +} + +emitterT void BTS32MtoR( uptr to, x86IntRegType from ) +{ + write8( 0xf ); + write8( 0xab ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); +} + +emitterT void BSWAP32R( x86IntRegType to ) +{ + write8( 0x0F ); + write8( 0xC8 + to ); +} + +emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) +{ + LEA32( x86Register32( to ), ptr[x86IndexReg(from)+offset] ); +} + +emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +{ + LEA32( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] ); +} + +// Don't inline recursive functions +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +{ + LEA32( x86Register32( to ), ptr[x86IndexReg(from)*(1< -#define MOV64RtoM eMOV64RtoM<_EmitterId_> -#define MOV64MtoR eMOV64MtoR<_EmitterId_> -#define MOV64I32toM eMOV64I32toM<_EmitterId_> -#define MOV64I32toR eMOV64I32toR<_EmitterId_> -#define MOV64ItoR eMOV64ItoR<_EmitterId_> -#define MOV64ItoRmOffset eMOV64ItoRmOffset<_EmitterId_> -#define MOV64RmOffsettoR eMOV64RmOffsettoR<_EmitterId_> -#define MOV64RmStoR eMOV64RmStoR<_EmitterId_> -#define MOV64RtoRmOffset eMOV64RtoRmOffset<_EmitterId_> -#define MOV64RtoRmS eMOV64RtoRmS<_EmitterId_> #define MOV32RtoR eMOV32RtoR<_EmitterId_> #define MOV32RtoM eMOV32RtoM<_EmitterId_> #define MOV32MtoR eMOV32MtoR<_EmitterId_> #define MOV32RmtoR eMOV32RmtoR<_EmitterId_> -#define MOV32RmtoROffset eMOV32RmtoROffset<_EmitterId_> +#define MOV32RmtoR eMOV32RmtoR<_EmitterId_> #define MOV32RmStoR eMOV32RmStoR<_EmitterId_> #define MOV32RmSOffsettoR eMOV32RmSOffsettoR<_EmitterId_> #define MOV32RtoRm eMOV32RtoRm<_EmitterId_> #define MOV32RtoRmS eMOV32RtoRmS<_EmitterId_> #define MOV32ItoR eMOV32ItoR<_EmitterId_> #define MOV32ItoM eMOV32ItoM<_EmitterId_> -#define MOV32ItoRmOffset eMOV32ItoRmOffset<_EmitterId_> -#define MOV32RtoRmOffset eMOV32RtoRmOffset<_EmitterId_> +#define MOV32ItoRm eMOV32ItoRm<_EmitterId_> +#define MOV32RtoRm eMOV32RtoRm<_EmitterId_> #define MOV16RtoM eMOV16RtoM<_EmitterId_> #define MOV16MtoR eMOV16MtoR<_EmitterId_> #define MOV16RmtoR eMOV16RmtoR<_EmitterId_> -#define MOV16RmtoROffset eMOV16RmtoROffset<_EmitterId_> +#define MOV16RmtoR eMOV16RmtoR<_EmitterId_> #define MOV16RmSOffsettoR eMOV16RmSOffsettoR<_EmitterId_> #define MOV16RtoRm eMOV16RtoRm<_EmitterId_> #define MOV16ItoM eMOV16ItoM<_EmitterId_> #define MOV16RtoRmS eMOV16RtoRmS<_EmitterId_> #define MOV16ItoR eMOV16ItoR<_EmitterId_> -#define MOV16ItoRmOffset eMOV16ItoRmOffset<_EmitterId_> -#define MOV16RtoRmOffset eMOV16RtoRmOffset<_EmitterId_> +#define MOV16ItoRm eMOV16ItoRm<_EmitterId_> +#define MOV16RtoRm eMOV16RtoRm<_EmitterId_> #define MOV8RtoM eMOV8RtoM<_EmitterId_> #define MOV8MtoR eMOV8MtoR<_EmitterId_> #define MOV8RmtoR eMOV8RmtoR<_EmitterId_> -#define MOV8RmtoROffset eMOV8RmtoROffset<_EmitterId_> +#define MOV8RmtoR eMOV8RmtoR<_EmitterId_> #define MOV8RmSOffsettoR eMOV8RmSOffsettoR<_EmitterId_> #define MOV8RtoRm eMOV8RtoRm<_EmitterId_> #define MOV8ItoM eMOV8ItoM<_EmitterId_> #define MOV8ItoR eMOV8ItoR<_EmitterId_> -#define MOV8ItoRmOffset eMOV8ItoRmOffset<_EmitterId_> -#define MOV8RtoRmOffset eMOV8RtoRmOffset<_EmitterId_> +#define MOV8ItoRm eMOV8ItoRm<_EmitterId_> +#define MOV8RtoRm eMOV8RtoRm<_EmitterId_> #define MOVSX32R8toR eMOVSX32R8toR<_EmitterId_> #define MOVSX32Rm8toR eMOVSX32Rm8toR<_EmitterId_> -#define MOVSX32Rm8toROffset eMOVSX32Rm8toROffset<_EmitterId_> #define MOVSX32M8toR eMOVSX32M8toR<_EmitterId_> #define MOVSX32R16toR eMOVSX32R16toR<_EmitterId_> #define MOVSX32Rm16toR eMOVSX32Rm16toR<_EmitterId_> -#define MOVSX32Rm16toROffset eMOVSX32Rm16toROffset<_EmitterId_> #define MOVSX32M16toR eMOVSX32M16toR<_EmitterId_> #define MOVZX32R8toR eMOVZX32R8toR<_EmitterId_> #define MOVZX32Rm8toR eMOVZX32Rm8toR<_EmitterId_> -#define MOVZX32Rm8toROffset eMOVZX32Rm8toROffset<_EmitterId_> #define MOVZX32M8toR eMOVZX32M8toR<_EmitterId_> #define MOVZX32R16toR eMOVZX32R16toR<_EmitterId_> #define MOVZX32Rm16toR eMOVZX32Rm16toR<_EmitterId_> -#define MOVZX32Rm16toROffset eMOVZX32Rm16toROffset<_EmitterId_> #define MOVZX32M16toR eMOVZX32M16toR<_EmitterId_> #define CMOVBE32RtoR eCMOVBE32RtoR<_EmitterId_> #define CMOVBE32MtoR eCMOVBE32MtoR<_EmitterId_> @@ -147,12 +132,10 @@ //------------------------------------------------------------------ // arithmetic instructions //------------------------------------------------------------------ -#define ADD64ItoR eADD64ItoR<_EmitterId_> -#define ADD64MtoR eADD64MtoR<_EmitterId_> #define ADD32ItoEAX eADD32ItoEAX<_EmitterId_> #define ADD32ItoR eADD32ItoR<_EmitterId_> #define ADD32ItoM eADD32ItoM<_EmitterId_> -#define ADD32ItoRmOffset eADD32ItoRmOffset<_EmitterId_> +#define ADD32ItoRm eADD32ItoRm<_EmitterId_> #define ADD32RtoR eADD32RtoR<_EmitterId_> #define ADD32RtoM eADD32RtoM<_EmitterId_> #define ADD32MtoR eADD32MtoR<_EmitterId_> @@ -171,7 +154,6 @@ #define INC32M eINC32M<_EmitterId_> #define INC16R eINC16R<_EmitterId_> #define INC16M eINC16M<_EmitterId_> -#define SUB64MtoR eSUB64MtoR<_EmitterId_> #define SUB32ItoR eSUB32ItoR<_EmitterId_> #define SUB32ItoM eSUB32ItoM<_EmitterId_> #define SUB32RtoR eSUB32RtoR<_EmitterId_> @@ -181,7 +163,6 @@ #define SUB16ItoR eSUB16ItoR<_EmitterId_> #define SUB16ItoM eSUB16ItoM<_EmitterId_> #define SUB16MtoR eSUB16MtoR<_EmitterId_> -#define SBB64RtoR eSBB64RtoR<_EmitterId_> #define SBB32ItoR eSBB32ItoR<_EmitterId_> #define SBB32ItoM eSBB32ItoM<_EmitterId_> #define SBB32RtoR eSBB32RtoR<_EmitterId_> @@ -203,12 +184,6 @@ //------------------------------------------------------------------ // shifting instructions //------------------------------------------------------------------ -#define SHL64ItoR eSHL64ItoR<_EmitterId_> -#define SHL64CLtoR eSHL64CLtoR<_EmitterId_> -#define SHR64ItoR eSHR64ItoR<_EmitterId_> -#define SHR64CLtoR eSHR64CLtoR<_EmitterId_> -#define SAR64ItoR eSAR64ItoR<_EmitterId_> -#define SAR64CLtoR eSAR64CLtoR<_EmitterId_> #define SHL32ItoR eSHL32ItoR<_EmitterId_> #define SHL32ItoM eSHL32ItoM<_EmitterId_> #define SHL32CLtoR eSHL32CLtoR<_EmitterId_> @@ -231,10 +206,6 @@ //------------------------------------------------------------------ // logical instructions //------------------------------------------------------------------ -#define OR64ItoR eOR64ItoR<_EmitterId_> -#define OR64MtoR eOR64MtoR<_EmitterId_> -#define OR64RtoR eOR64RtoR<_EmitterId_> -#define OR64RtoM eOR64RtoM<_EmitterId_> #define OR32ItoR eOR32ItoR<_EmitterId_> #define OR32ItoM eOR32ItoM<_EmitterId_> #define OR32RtoR eOR32RtoR<_EmitterId_> @@ -249,11 +220,6 @@ #define OR8RtoM eOR8RtoM<_EmitterId_> #define OR8ItoM eOR8ItoM<_EmitterId_> #define OR8MtoR eOR8MtoR<_EmitterId_> -#define XOR64ItoR eXOR64ItoR<_EmitterId_> -#define XOR64RtoR eXOR64RtoR<_EmitterId_> -#define XOR64MtoR eXOR64MtoR<_EmitterId_> -#define XOR64RtoR eXOR64RtoR<_EmitterId_> -#define XOR64RtoM eXOR64RtoM<_EmitterId_> #define XOR32ItoR eXOR32ItoR<_EmitterId_> #define XOR32ItoM eXOR32ItoM<_EmitterId_> #define XOR32RtoR eXOR32RtoR<_EmitterId_> @@ -262,11 +228,6 @@ #define XOR32MtoR eXOR32MtoR<_EmitterId_> #define XOR16RtoM eXOR16RtoM<_EmitterId_> #define XOR16ItoR eXOR16ItoR<_EmitterId_> -#define AND64I32toR eAND64I32toR<_EmitterId_> -#define AND64MtoR eAND64MtoR<_EmitterId_> -#define AND64RtoM eAND64RtoM<_EmitterId_> -#define AND64RtoR eAND64RtoR<_EmitterId_> -#define AND64I32toM eAND64I32toM<_EmitterId_> #define AND32ItoR eAND32ItoR<_EmitterId_> #define AND32I8toR eAND32I8toR<_EmitterId_> #define AND32ItoM eAND32ItoM<_EmitterId_> @@ -275,7 +236,7 @@ #define AND32RtoM eAND32RtoM<_EmitterId_> #define AND32MtoR eAND32MtoR<_EmitterId_> #define AND32RmtoR eAND32RmtoR<_EmitterId_> -#define AND32RmtoROffset eAND32RmtoROffset<_EmitterId_> +#define AND32RmtoR eAND32RmtoR<_EmitterId_> #define AND16RtoR eAND16RtoR<_EmitterId_> #define AND16ItoR eAND16ItoR<_EmitterId_> #define AND16ItoM eAND16ItoM<_EmitterId_> @@ -286,11 +247,8 @@ #define AND8RtoM eAND8RtoM<_EmitterId_> #define AND8MtoR eAND8MtoR<_EmitterId_> #define AND8RtoR eAND8RtoR<_EmitterId_> -#define BTS32MtoR eBTS32MtoR<_EmitterId_> -#define NOT64R eNOT64R<_EmitterId_> #define NOT32R eNOT32R<_EmitterId_> #define NOT32M eNOT32M<_EmitterId_> -#define NEG64R eNEG64R<_EmitterId_> #define NEG32R eNEG32R<_EmitterId_> #define NEG32M eNEG32M<_EmitterId_> #define NEG16R eNEG16R<_EmitterId_> @@ -350,15 +308,13 @@ //------------------------------------------------------------------ // misc instructions //------------------------------------------------------------------ -#define CMP64I32toR eCMP64I32toR<_EmitterId_> -#define CMP64MtoR eCMP64MtoR<_EmitterId_> -#define CMP64RtoR eCMP64RtoR<_EmitterId_> #define CMP32ItoR eCMP32ItoR<_EmitterId_> #define CMP32ItoM eCMP32ItoM<_EmitterId_> #define CMP32RtoR eCMP32RtoR<_EmitterId_> #define CMP32MtoR eCMP32MtoR<_EmitterId_> +#define CMP32ItoRm eCMP32ItoRm<_EmitterId_> +#define CMP8I8toRm eCMP8I8toRm<_EmitterId_> #define CMP32I8toRm eCMP32I8toRm<_EmitterId_> -#define CMP32I8toRmOffset8 eCMP32I8toRmOffset8<_EmitterId_> #define CMP32I8toM eCMP32I8toM<_EmitterId_> #define CMP16ItoR eCMP16ItoR<_EmitterId_> #define CMP16ItoM eCMP16ItoM<_EmitterId_> @@ -540,16 +496,16 @@ #define PUNPCKHDQMtoR ePUNPCKHDQMtoR<_EmitterId_> #define MOVQ64ItoR eMOVQ64ItoR<_EmitterId_> #define MOVQRtoR eMOVQRtoR<_EmitterId_> -#define MOVQRmtoROffset eMOVQRmtoROffset<_EmitterId_> -#define MOVQRtoRmOffset eMOVQRtoRmOffset<_EmitterId_> +#define MOVQRmtoR eMOVQRmtoR<_EmitterId_> +#define MOVQRtoRm eMOVQRtoRm<_EmitterId_> #define MOVDMtoMMX eMOVDMtoMMX<_EmitterId_> #define MOVDMMXtoM eMOVDMMXtoM<_EmitterId_> #define MOVD32RtoMMX eMOVD32RtoMMX<_EmitterId_> #define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_> -#define MOVD32RmOffsettoMMX eMOVD32RmOffsettoMMX<_EmitterId_> +#define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_> #define MOVD32MMXtoR eMOVD32MMXtoR<_EmitterId_> #define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_> -#define MOVD32MMXtoRmOffset eMOVD32MMXtoRmOffset<_EmitterId_> +#define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_> #define PINSRWRtoMMX ePINSRWRtoMMX<_EmitterId_> #define PSHUFWRtoR ePSHUFWRtoR<_EmitterId_> #define PSHUFWMtoR ePSHUFWMtoR<_EmitterId_> @@ -576,33 +532,31 @@ #define SSE_MOVSS_XMM_to_M32 eSSE_MOVSS_XMM_to_M32<_EmitterId_> #define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_> #define SSE_MOVSS_XMM_to_XMM eSSE_MOVSS_XMM_to_XMM<_EmitterId_> -#define SSE_MOVSS_RmOffset_to_XMM eSSE_MOVSS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVSS_XMM_to_RmOffset eSSE_MOVSS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVSS_Rm_to_XMM eSSE_MOVSS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_> #define SSE_MASKMOVDQU_XMM_to_XMM eSSE_MASKMOVDQU_XMM_to_XMM<_EmitterId_> #define SSE_MOVLPS_M64_to_XMM eSSE_MOVLPS_M64_to_XMM<_EmitterId_> #define SSE_MOVLPS_XMM_to_M64 eSSE_MOVLPS_XMM_to_M64<_EmitterId_> -#define SSE_MOVLPS_RmOffset_to_XMM eSSE_MOVLPS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVLPS_XMM_to_RmOffset eSSE_MOVLPS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVLPS_Rm_to_XMM eSSE_MOVLPS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVLPS_XMM_to_Rm eSSE_MOVLPS_XMM_to_Rm<_EmitterId_> #define SSE_MOVHPS_M64_to_XMM eSSE_MOVHPS_M64_to_XMM<_EmitterId_> #define SSE_MOVHPS_XMM_to_M64 eSSE_MOVHPS_XMM_to_M64<_EmitterId_> -#define SSE_MOVHPS_RmOffset_to_XMM eSSE_MOVHPS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVHPS_XMM_to_RmOffset eSSE_MOVHPS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVHPS_Rm_to_XMM eSSE_MOVHPS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVHPS_XMM_to_Rm eSSE_MOVHPS_XMM_to_Rm<_EmitterId_> #define SSE_MOVLHPS_XMM_to_XMM eSSE_MOVLHPS_XMM_to_XMM<_EmitterId_> #define SSE_MOVHLPS_XMM_to_XMM eSSE_MOVHLPS_XMM_to_XMM<_EmitterId_> #define SSE_MOVLPSRmtoR eSSE_MOVLPSRmtoR<_EmitterId_> -#define SSE_MOVLPSRmtoROffset eSSE_MOVLPSRmtoROffset<_EmitterId_> #define SSE_MOVLPSRtoRm eSSE_MOVLPSRtoRm<_EmitterId_> -#define SSE_MOVLPSRtoRmOffset eSSE_MOVLPSRtoRmOffset<_EmitterId_> #define SSE_MOVAPSRmStoR eSSE_MOVAPSRmStoR<_EmitterId_> #define SSE_MOVAPSRtoRmS eSSE_MOVAPSRtoRmS<_EmitterId_> -#define SSE_MOVAPSRtoRmOffset eSSE_MOVAPSRtoRmOffset<_EmitterId_> -#define SSE_MOVAPSRmtoROffset eSSE_MOVAPSRmtoROffset<_EmitterId_> +#define SSE_MOVAPSRtoRm eSSE_MOVAPSRtoRm<_EmitterId_> +#define SSE_MOVAPSRmtoR eSSE_MOVAPSRmtoR<_EmitterId_> #define SSE_MOVUPSRmStoR eSSE_MOVUPSRmStoR<_EmitterId_> #define SSE_MOVUPSRtoRmS eSSE_MOVUPSRtoRmS<_EmitterId_> #define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_> #define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_> -#define SSE_MOVUPSRmtoROffset eSSE_MOVUPSRmtoROffset<_EmitterId_> -#define SSE_MOVUPSRtoRmOffset eSSE_MOVUPSRtoRmOffset<_EmitterId_> +#define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_> +#define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_> #define SSE_RCPPS_XMM_to_XMM eSSE_RCPPS_XMM_to_XMM<_EmitterId_> #define SSE_RCPPS_M128_to_XMM eSSE_RCPPS_M128_to_XMM<_EmitterId_> #define SSE_RCPSS_XMM_to_XMM eSSE_RCPSS_XMM_to_XMM<_EmitterId_> @@ -677,7 +631,7 @@ #define SSE_UNPCKHPS_XMM_to_XMM eSSE_UNPCKHPS_XMM_to_XMM<_EmitterId_> #define SSE_SHUFPS_XMM_to_XMM eSSE_SHUFPS_XMM_to_XMM<_EmitterId_> #define SSE_SHUFPS_M128_to_XMM eSSE_SHUFPS_M128_to_XMM<_EmitterId_> -#define SSE_SHUFPS_RmOffset_to_XMM eSSE_SHUFPS_RmOffset_to_XMM<_EmitterId_> +#define SSE_SHUFPS_Rm_to_XMM eSSE_SHUFPS_Rm_to_XMM<_EmitterId_> #define SSE_CMPEQPS_M128_to_XMM eSSE_CMPEQPS_M128_to_XMM<_EmitterId_> #define SSE_CMPEQPS_XMM_to_XMM eSSE_CMPEQPS_XMM_to_XMM<_EmitterId_> #define SSE_CMPLTPS_M128_to_XMM eSSE_CMPLTPS_M128_to_XMM<_EmitterId_> @@ -781,8 +735,8 @@ #define SSE2_MOVQ_XMM_to_M64 eSSE2_MOVQ_XMM_to_M64<_EmitterId_> #define SSE2_MOVDQ2Q_XMM_to_MM eSSE2_MOVDQ2Q_XMM_to_MM<_EmitterId_> #define SSE2_MOVQ2DQ_MM_to_XMM eSSE2_MOVQ2DQ_MM_to_XMM<_EmitterId_> -#define SSE2_MOVDQARtoRmOffset eSSE2_MOVDQARtoRmOffset<_EmitterId_> -#define SSE2_MOVDQARmtoROffset eSSE2_MOVDQARmtoROffset<_EmitterId_> +#define SSE2_MOVDQARtoRm eSSE2_MOVDQARtoRm<_EmitterId_> +#define SSE2_MOVDQARmtoR eSSE2_MOVDQARmtoR<_EmitterId_> #define SSE2_CVTDQ2PS_M128_to_XMM eSSE2_CVTDQ2PS_M128_to_XMM<_EmitterId_> #define SSE2_CVTDQ2PS_XMM_to_XMM eSSE2_CVTDQ2PS_XMM_to_XMM<_EmitterId_> #define SSE2_CVTPS2DQ_M128_to_XMM eSSE2_CVTPS2DQ_M128_to_XMM<_EmitterId_> @@ -921,11 +875,11 @@ #define SSE2_MOVD_M32_to_XMM eSSE2_MOVD_M32_to_XMM<_EmitterId_> #define SSE2_MOVD_R_to_XMM eSSE2_MOVD_R_to_XMM<_EmitterId_> #define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_> -#define SSE2_MOVD_RmOffset_to_XMM eSSE2_MOVD_RmOffset_to_XMM<_EmitterId_> +#define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_> #define SSE2_MOVD_XMM_to_M32 eSSE2_MOVD_XMM_to_M32<_EmitterId_> #define SSE2_MOVD_XMM_to_R eSSE2_MOVD_XMM_to_R<_EmitterId_> #define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_> -#define SSE2_MOVD_XMM_to_RmOffset eSSE2_MOVD_XMM_to_RmOffset<_EmitterId_> +#define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_> #define SSE2_MOVQ_XMM_to_R eSSE2_MOVQ_XMM_to_R<_EmitterId_> #define SSE2_MOVQ_R_to_XMM eSSE2_MOVQ_R_to_XMM<_EmitterId_> //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86_mmx.cpp b/pcsx2/x86/ix86/ix86_mmx.cpp new file mode 100644 index 0000000000..77f8f33c97 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_mmx.cpp @@ -0,0 +1,584 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" +#include "ix86_internal.h" + +//------------------------------------------------------------------ +// MMX instructions +// +// note: r64 = mm +//------------------------------------------------------------------ + +/* movq m64 to r64 */ +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0x6F0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* movq r64 to m64 */ +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) +{ + write16( 0x7F0F ); + ModRM( 0, from, DISP32 ); + write32(MEMADDR(to, 4)); +} + +/* pand r64 to r64 */ +emitterT void PANDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xDB0F ); + ModRM( 3, to, from ); +} + +emitterT void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xDF0F ); + ModRM( 3, to, from ); +} + +/* por r64 to r64 */ +emitterT void PORRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xEB0F ); + ModRM( 3, to, from ); +} + +/* pxor r64 to r64 */ +emitterT void PXORRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xEF0F ); + ModRM( 3, to, from ); +} + +/* psllq r64 to r64 */ +emitterT void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xF30F ); + ModRM( 3, to, from ); +} + +/* psllq m64 to r64 */ +emitterT void PSLLQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xF30F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* psllq imm8 to r64 */ +emitterT void PSLLQItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x730F ); + ModRM( 3, 6, to); + write8( from ); +} + +/* psrlq r64 to r64 */ +emitterT void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xD30F ); + ModRM( 3, to, from ); +} + +/* psrlq m64 to r64 */ +emitterT void PSRLQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xD30F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* psrlq imm8 to r64 */ +emitterT void PSRLQItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x730F ); + ModRM( 3, 2, to); + write8( from ); +} + +/* paddusb r64 to r64 */ +emitterT void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xDC0F ); + ModRM( 3, to, from ); +} + +/* paddusb m64 to r64 */ +emitterT void PADDUSBMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xDC0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* paddusw r64 to r64 */ +emitterT void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xDD0F ); + ModRM( 3, to, from ); +} + +/* paddusw m64 to r64 */ +emitterT void PADDUSWMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xDD0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* paddb r64 to r64 */ +emitterT void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xFC0F ); + ModRM( 3, to, from ); +} + +/* paddb m64 to r64 */ +emitterT void PADDBMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xFC0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* paddw r64 to r64 */ +emitterT void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xFD0F ); + ModRM( 3, to, from ); +} + +/* paddw m64 to r64 */ +emitterT void PADDWMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xFD0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* paddd r64 to r64 */ +emitterT void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xFE0F ); + ModRM( 3, to, from ); +} + +/* paddd m64 to r64 */ +emitterT void PADDDMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xFE0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* emms */ +emitterT void EMMS() +{ + write16( 0x770F ); +} + +emitterT void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xEC0F ); + ModRM( 3, to, from ); +} + +emitterT void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xED0F ); + ModRM( 3, to, from ); +} + +// paddq m64 to r64 (sse2 only?) +emitterT void PADDQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xD40F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +// paddq r64 to r64 (sse2 only?) +emitterT void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xD40F ); + ModRM( 3, to, from ); +} + +emitterT void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xE80F ); + ModRM( 3, to, from ); +} + +emitterT void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xE90F ); + ModRM( 3, to, from ); +} + + +emitterT void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xF80F ); + ModRM( 3, to, from ); +} + +emitterT void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xF90F ); + ModRM( 3, to, from ); +} + +emitterT void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xFA0F ); + ModRM( 3, to, from ); +} + +emitterT void PSUBDMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xFA0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xD80F ); + ModRM( 3, to, from ); +} + +emitterT void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xD90F ); + ModRM( 3, to, from ); +} + +// psubq m64 to r64 (sse2 only?) +emitterT void PSUBQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xFB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +// psubq r64 to r64 (sse2 only?) +emitterT void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xFB0F ); + ModRM( 3, to, from ); +} + +// pmuludq m64 to r64 (sse2 only?) +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xF40F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +// pmuludq r64 to r64 (sse2 only?) +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xF40F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x740F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x750F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x760F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0x760F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x640F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x650F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x660F ); + ModRM( 3, to, from ); +} + +emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0x660F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void PSRLWItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( from ); +} + +emitterT void PSRLDItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( from ); +} + +emitterT void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xD20F ); + ModRM( 3, to, from ); +} + +emitterT void PSLLWItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( from ); +} + +emitterT void PSLLDItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( from ); +} + +emitterT void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xF20F ); + ModRM( 3, to, from ); +} + +emitterT void PSRAWItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( from ); +} + +emitterT void PSRADItoR( x86MMXRegType to, u8 from ) +{ + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( from ); +} + +emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0xE20F ); + ModRM( 3, to, from ); +} + +/* por m64 to r64 */ +emitterT void PORMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xEB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* pxor m64 to r64 */ +emitterT void PXORMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xEF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* pand m64 to r64 */ +emitterT void PANDMtoR( x86MMXRegType to, uptr from ) +{ + //u64 rip = (u64)x86Ptr + 7; + write16( 0xDB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void PANDNMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0xDF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x6A0F ); + ModRM( 3, to, from ); +} + +emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0x6A0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x620F ); + ModRM( 3, to, from ); +} + +emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) +{ + write16( 0x620F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void MOVQ64ItoR( x86MMXRegType reg, u64 i ) +{ + MOVQMtoR( reg, ( uptr )(x86Ptr) + 2 + 7 ); + JMP8( 8 ); + write64( i ); +} + +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + write16( 0x6F0F ); + ModRM( 3, to, from ); +} + +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) +{ + write16( 0x6F0F ); + WriteRmOffsetFrom( to, from, offset ); +} + +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) +{ + write16( 0x7F0F ); + WriteRmOffsetFrom( from, to, offset ); +} + +/* movd m32 to r64 */ +emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) +{ + write16( 0x6E0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +/* movd r64 to m32 */ +emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) +{ + write16( 0x7E0F ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); +} + +emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) +{ + write16( 0x6E0F ); + ModRM( 3, to, from ); +} + +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) +{ + write16( 0x6E0F ); + WriteRmOffsetFrom( to, from, offset ); +} + +emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) +{ + write16( 0x7E0F ); + ModRM( 3, from, to ); +} + +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) +{ + write16( 0x7E0F ); + WriteRmOffsetFrom( from, to, offset ); +} + +// untested +emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +{ + write16( 0x630F ); + ModRM( 3, to, from ); +} + +emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +{ + write16( 0x6B0F ); + ModRM( 3, to, from ); +} + +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) +{ + write16( 0xD70F ); + ModRM( 3, to, from ); +} + +emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) +{ + if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); + write16( 0xc40f ); + ModRM( 3, to, from ); + write8( imm8 ); +} + +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) +{ + write16(0x700f); + ModRM( 3, to, from ); + write8(imm8); +} + +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) +{ + write16( 0x700f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); + write8(imm8); +} + +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) +{ + write16(0xf70f); + ModRM( 3, to, from ); +} diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.inl deleted file mode 100644 index 18126cd6e1..0000000000 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ /dev/null @@ -1,647 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -//------------------------------------------------------------------ -// MMX instructions -// -// note: r64 = mm -//------------------------------------------------------------------ - -/* movq m64 to r64 */ -emitterT void eMOVQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x6F0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movq r64 to m64 */ -emitterT void eMOVQRtoM( uptr to, x86MMXRegType from ) -{ - write16( 0x7F0F ); - ModRM( 0, from, DISP32 ); - write32(MEMADDR(to, 4)); -} - -/* pand r64 to r64 */ -emitterT void ePANDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDB0F ); - ModRM( 3, to, from ); -} - -emitterT void ePANDNRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDF0F ); - ModRM( 3, to, from ); -} - -/* por r64 to r64 */ -emitterT void ePORRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xEB0F ); - ModRM( 3, to, from ); -} - -/* pxor r64 to r64 */ -emitterT void ePXORRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xEF0F ); - ModRM( 3, to, from ); -} - -/* psllq r64 to r64 */ -emitterT void ePSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF30F ); - ModRM( 3, to, from ); -} - -/* psllq m64 to r64 */ -emitterT void ePSLLQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xF30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* psllq imm8 to r64 */ -emitterT void ePSLLQItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x730F ); - ModRM( 3, 6, to); - write8( from ); -} - -/* psrlq r64 to r64 */ -emitterT void ePSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD30F ); - ModRM( 3, to, from ); -} - -/* psrlq m64 to r64 */ -emitterT void ePSRLQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xD30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* psrlq imm8 to r64 */ -emitterT void ePSRLQItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x730F ); - ModRM( 3, 2, to); - write8( from ); -} - -/* paddusb r64 to r64 */ -emitterT void ePADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDC0F ); - ModRM( 3, to, from ); -} - -/* paddusb m64 to r64 */ -emitterT void ePADDUSBMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xDC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddusw r64 to r64 */ -emitterT void ePADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDD0F ); - ModRM( 3, to, from ); -} - -/* paddusw m64 to r64 */ -emitterT void ePADDUSWMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xDD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddb r64 to r64 */ -emitterT void ePADDBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFC0F ); - ModRM( 3, to, from ); -} - -/* paddb m64 to r64 */ -emitterT void ePADDBMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddw r64 to r64 */ -emitterT void ePADDWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFD0F ); - ModRM( 3, to, from ); -} - -/* paddw m64 to r64 */ -emitterT void ePADDWMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddd r64 to r64 */ -emitterT void ePADDDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFE0F ); - ModRM( 3, to, from ); -} - -/* paddd m64 to r64 */ -emitterT void ePADDDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* emms */ -emitterT void eEMMS() -{ - write16( 0x770F ); -} - -emitterT void ePADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xEC0F ); - ModRM( 3, to, from ); -} - -emitterT void ePADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xED0F ); - ModRM( 3, to, from ); -} - -// paddq m64 to r64 (sse2 only?) -emitterT void ePADDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xD40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// paddq r64 to r64 (sse2 only?) -emitterT void ePADDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD40F ); - ModRM( 3, to, from ); -} - -emitterT void ePSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE80F ); - ModRM( 3, to, from ); -} - -emitterT void ePSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE90F ); - ModRM( 3, to, from ); -} - - -emitterT void ePSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF80F ); - ModRM( 3, to, from ); -} - -emitterT void ePSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF90F ); - ModRM( 3, to, from ); -} - -emitterT void ePSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFA0F ); - ModRM( 3, to, from ); -} - -emitterT void ePSUBDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFA0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void ePSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD80F ); - ModRM( 3, to, from ); -} - -emitterT void ePSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD90F ); - ModRM( 3, to, from ); -} - -// psubq m64 to r64 (sse2 only?) -emitterT void ePSUBQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// psubq r64 to r64 (sse2 only?) -emitterT void ePSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFB0F ); - ModRM( 3, to, from ); -} - -// pmuludq m64 to r64 (sse2 only?) -emitterT void ePMULUDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xF40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// pmuludq r64 to r64 (sse2 only?) -emitterT void ePMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF40F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x740F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x750F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x760F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPEQDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x760F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void ePCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x640F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x650F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x660F ); - ModRM( 3, to, from ); -} - -emitterT void ePCMPGTDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x660F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void ePSRLWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( from ); -} - -emitterT void ePSRLDItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( from ); -} - -emitterT void ePSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD20F ); - ModRM( 3, to, from ); -} - -emitterT void ePSLLWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( from ); -} - -emitterT void ePSLLDItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( from ); -} - -emitterT void ePSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF20F ); - ModRM( 3, to, from ); -} - -emitterT void ePSRAWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( from ); -} - -emitterT void ePSRADItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( from ); -} - -emitterT void ePSRADRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE20F ); - ModRM( 3, to, from ); -} - -/* por m64 to r64 */ -emitterT void ePORMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xEB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* pxor m64 to r64 */ -emitterT void ePXORMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xEF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* pand m64 to r64 */ -emitterT void ePANDMtoR( x86MMXRegType to, uptr from ) -{ - //u64 rip = (u64)x86Ptr[0] + 7; - write16( 0xDB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void ePANDNMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xDF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void ePUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x6A0F ); - ModRM( 3, to, from ); -} - -emitterT void ePUNPCKHDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x6A0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void ePUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x620F ); - ModRM( 3, to, from ); -} - -emitterT void ePUNPCKLDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x620F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void eMOVQ64ItoR( x86MMXRegType reg, u64 i ) -{ - eMOVQMtoR( reg, ( uptr )(x86Ptr[0]) + 2 + 7 ); - eJMP8( 8 ); - write64( i ); -} - -emitterT void eMOVQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x6F0F ); - ModRM( 3, to, from ); -} - -emitterT void eMOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ) -{ - write16( 0x6F0F ); - - if( offset < 128 ) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } -} - -emitterT void eMOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) -{ - write16( 0x7F0F ); - - if( offset < 128 ) { - ModRM( 1, from , to ); - write8(offset); - } - else { - ModRM( 2, from, to ); - write32(offset); - } -} - -/* movd m32 to r64 */ -emitterT void eMOVDMtoMMX( x86MMXRegType to, uptr from ) -{ - write16( 0x6E0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movd r64 to m32 */ -emitterT void eMOVDMMXtoM( uptr to, x86MMXRegType from ) -{ - write16( 0x7E0F ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -emitterT void eMOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) -{ - write16( 0x6E0F ); - ModRM( 3, to, from ); -} - -emitterT void eMOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) -{ - write16( 0x6E0F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) -{ - write16( 0x6E0F ); - - if( offset < 128 ) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } -} - -emitterT void eMOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) -{ - write16( 0x7E0F ); - ModRM( 3, from, to ); -} - -emitterT void eMOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) -{ - write16( 0x7E0F ); - ModRM( 0, from, to ); - if( to >= 4 ) { - // no idea why - assert( to == ESP ); - write8(0x24); - } - -} - -emitterT void eMOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) -{ - write16( 0x7E0F ); - - if( offset < 128 ) { - ModRM( 1, from, to ); - write8(offset); - } - else { - ModRM( 2, from, to ); - write32(offset); - } -} - -///* movd r32 to r64 */ -//emitterT void eMOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) -//{ -// write16( 0x6E0F ); -// ModRM( 3, to, from ); -//} -// -///* movq r64 to r32 */ -//emitterT void eMOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) -//{ -// write16( 0x7E0F ); -// ModRM( 3, from, to ); -//} - -// untested -emitterT void ePACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) -{ - write16( 0x630F ); - ModRM( 3, to, from ); -} - -emitterT void ePACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) -{ - write16( 0x6B0F ); - ModRM( 3, to, from ); -} - -emitterT void ePMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) -{ - write16( 0xD70F ); - ModRM( 3, to, from ); -} - -emitterT void ePINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) -{ - if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); - write16( 0xc40f ); - ModRM( 3, to, from ); - write8( imm8 ); -} - -emitterT void ePSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) -{ - write16(0x700f); - ModRM( 3, to, from ); - write8(imm8); -} - -emitterT void ePSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) -{ - write16( 0x700f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); - write8(imm8); -} - -emitterT void eMASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) -{ - write16(0xf70f); - ModRM( 3, to, from ); -} diff --git a/pcsx2/x86/ix86/ix86_sse.cpp b/pcsx2/x86/ix86/ix86_sse.cpp new file mode 100644 index 0000000000..ffeb51365b --- /dev/null +++ b/pcsx2/x86/ix86/ix86_sse.cpp @@ -0,0 +1,1561 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" +#include "ix86_internal.h" +#include "ix86_sse_helpers.h" + +////////////////////////////////////////////////////////////////////////////////////////// +// AlwaysUseMovaps [const] +// +// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions +// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache +// and some marginal speed gains as a result. (it's possible someday in the future the per- +// formance of the two instructions could change, so this constant is provided to restore MOVDQA +// use easily at a later time, if needed). +// +static const bool AlwaysUseMovaps = true; + + +//------------------------------------------------------------------ +// SSE instructions +//------------------------------------------------------------------ + +#define SSEMtoR( code, overb ) \ + assert( to < XMMREGS ), \ + RexR(0, to), \ + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) + +#define SSERtoM( code, overb ) \ + assert( from < XMMREGS), \ + RexR(0, from), \ + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) + +#define SSE_SS_MtoR( code, overb ) \ + assert( to < XMMREGS ), \ + write8( 0xf3 ), \ + RexR(0, to), \ + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) + +#define SSE_SS_RtoM( code, overb ) \ + assert( from < XMMREGS), \ + write8( 0xf3 ), \ + RexR(0, from), \ + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) + +#define SSERtoR( code ) \ + assert( to < XMMREGS && from < XMMREGS), \ + RexRB(0, to, from), \ + write16( code ), \ + ModRM( 3, to, from ) + +#define SSEMtoR66( code ) \ + write8( 0x66 ), \ + SSEMtoR( code, 0 ) + +#define SSERtoM66( code ) \ + write8( 0x66 ), \ + SSERtoM( code, 0 ) + +#define SSERtoR66( code ) \ + write8( 0x66 ), \ + SSERtoR( code ) + +#define _SSERtoR66( code ) \ + assert( to < XMMREGS && from < XMMREGS), \ + write8( 0x66 ), \ + RexRB(0, from, to), \ + write16( code ), \ + ModRM( 3, from, to ) + +#define SSE_SS_RtoR( code ) \ + assert( to < XMMREGS && from < XMMREGS), \ + write8( 0xf3 ), \ + RexRB(0, to, from), \ + write16( code ), \ + ModRM( 3, to, from ) + +#define SSE_SD_MtoR( code, overb ) \ + assert( to < XMMREGS ) , \ + write8( 0xf2 ), \ + RexR(0, to), \ + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) \ + +#define SSE_SD_RtoM( code, overb ) \ + assert( from < XMMREGS) , \ + write8( 0xf2 ), \ + RexR(0, from), \ + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) \ + +#define SSE_SD_RtoR( code ) \ + assert( to < XMMREGS && from < XMMREGS) , \ + write8( 0xf2 ), \ + RexRB(0, to, from), \ + write16( code ), \ + ModRM( 3, to, from ) + +#define CMPPSMtoR( op ) \ + SSEMtoR( 0xc20f, 1 ), \ + write8( op ) + +#define CMPPSRtoR( op ) \ + SSERtoR( 0xc20f ), \ + write8( op ) + +#define CMPSSMtoR( op ) \ + SSE_SS_MtoR( 0xc20f, 1 ), \ + write8( op ) + +#define CMPSSRtoR( op ) \ + SSE_SS_RtoR( 0xc20f ), \ + write8( op ) + +#define CMPSDMtoR( op ) \ + SSE_SD_MtoR( 0xc20f, 1 ), \ + write8( op ) + +#define CMPSDRtoR( op ) \ + SSE_SD_RtoR( 0xc20f ), \ + write8( op ) + +/* movups [r32][r32*scale] to xmm1 */ +emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + RexRXB(0, to, from2, from); + write16( 0x100f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +/* movups xmm1 to [r32][r32*scale] */ +emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + RexRXB(1, to, from2, from); + write16( 0x110f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +/* movups [r32] to r32 */ +emitterT void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x100f ); + ModRM( 0, to, from ); +} + +/* movups r32 to [r32] */ +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, from, to); + write16( 0x110f ); + ModRM( 0, from, to ); +} + +/* movlps [r32] to r32 */ +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) +{ + RexRB(1, to, from); + write16( 0x120f ); + ModRM( 0, to, from ); +} + +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); +} + +/* movaps r32 to [r32] */ +emitterT void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, from, to); + write16( 0x130f ); + ModRM( 0, from, to ); +} + +emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, from, to); + write16( 0x130f ); + WriteRmOffsetFrom(from, to, offset); +} + +/* movaps [r32][r32*scale] to xmm1 */ +emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + assert( from != EBP ); + RexRXB(0, to, from2, from); + write16( 0x280f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +/* movaps xmm1 to [r32][r32*scale] */ +emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + assert( from != EBP ); + RexRXB(0, to, from2, from); + write16( 0x290f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +// movaps [r32+offset] to r32 +emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write16( 0x280f ); + WriteRmOffsetFrom(to, from, offset); +} + +// movaps r32 to [r32+offset] +emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) +{ + RexRB(0, from, to); + write16( 0x290f ); + WriteRmOffsetFrom(from, to, offset); +} + +// movdqa [r32+offset] to r32 +emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( AlwaysUseMovaps ) + SSE_MOVAPSRmtoR( to, from, offset ); + else + { + write8(0x66); + RexRB(0, to, from); + write16( 0x6f0f ); + WriteRmOffsetFrom(to, from, offset); + } +} + +// movdqa r32 to [r32+offset] +emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( AlwaysUseMovaps ) + SSE_MOVAPSRtoRm( to, from, offset ); + else + { + write8(0x66); + RexRB(0, from, to); + write16( 0x7f0f ); + WriteRmOffsetFrom(from, to, offset); + } +} + +// movups [r32+offset] to r32 +emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); +} + +// movups r32 to [r32+offset] +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) +{ + RexRB(0, from, to); + write16( 0x110f ); + WriteRmOffsetFrom(from, to, offset); +} + +//**********************************************************************************/ +//MOVAPS: Move aligned Packed Single Precision FP values * +//********************************************************************************** +emitterT void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } +emitterT void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } +emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } + +emitterT void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } +emitterT void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } + +emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } +emitterT void SSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } +emitterT void SSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } + +emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) +{ + write8(0xf3); SSEMtoR( 0x7e0f, 0); +} + +emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + write8(0xf3); SSERtoR( 0x7e0f); +} + +emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) +{ + SSERtoM66(0xd60f); +} + +emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) +{ + write8(0xf2); + SSERtoR( 0xd60f); +} +emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) +{ + write8(0xf3); + SSERtoR( 0xd60f); +} + +//**********************************************************************************/ +//MOVSS: Move Scalar Single-Precision FP value * +//********************************************************************************** +emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } +emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } + +emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } + +emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + write8(0xf3); + RexRB(0, to, from); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); +} + +emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + write8(0xf3); + RexRB(0, from, to); + write16(0x110f); + WriteRmOffsetFrom(from, to, offset); +} + +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } +//**********************************************************************************/ +//MOVLPS: Move low Packed Single-Precision FP * +//********************************************************************************** +emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } +emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } + +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); +} + +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + RexRB(0, from, to); + write16(0x130f); + WriteRmOffsetFrom(from, to, offset); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVHPS: Move High Packed Single-Precision FP * +//********************************************************************************** +emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } +emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } + +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write16( 0x160f ); + WriteRmOffsetFrom(to, from, offset); +} + +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + RexRB(0, from, to); + write16(0x170f); + WriteRmOffsetFrom(from, to, offset); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVLHPS: Moved packed Single-Precision FP low to high * +//********************************************************************************** +emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVHLPS: Moved packed Single-Precision FP High to Low * +//********************************************************************************** +emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } + +/////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ANDPS: Logical Bit-wise AND for Single FP * +//********************************************************************************** +emitterT void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } +emitterT void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } + +emitterT void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } +emitterT void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * +//********************************************************************************** +emitterT void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } +emitterT void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } + +emitterT void SSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } +emitterT void SSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//RCPPS : Packed Single-Precision FP Reciprocal * +//********************************************************************************** +emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } +emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } + +emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } +emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ORPS : Bit-wise Logical OR of Single-Precision FP Data * +//********************************************************************************** +emitterT void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } +emitterT void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } + +emitterT void SSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } +emitterT void SSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//XORPS : Bitwise Logical XOR of Single-Precision FP Values * +//********************************************************************************** +emitterT void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } +emitterT void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } + +emitterT void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } +emitterT void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ADDPS : ADD Packed Single-Precision FP Values * +//********************************************************************************** +emitterT void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } +emitterT void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ADDSS : ADD Scalar Single-Precision FP Values * +//********************************************************************************** +emitterT void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } +emitterT void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } + +emitterT void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } +emitterT void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SUBPS: Packed Single-Precision FP Subtract * +//********************************************************************************** +emitterT void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } +emitterT void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SUBSS : Scalar Single-Precision FP Subtract * +//********************************************************************************** +emitterT void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } +emitterT void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } + +emitterT void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } +emitterT void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MULPS : Packed Single-Precision FP Multiply * +//********************************************************************************** +emitterT void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } +emitterT void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MULSS : Scalar Single-Precision FP Multiply * +//********************************************************************************** +emitterT void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } +emitterT void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } + +emitterT void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } +emitterT void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } + +//////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//Packed Single-Precission FP compare (CMPccPS) * +//********************************************************************************** +//missing SSE_CMPPS_I8_to_XMM +// SSE_CMPPS_M32_to_XMM +// SSE_CMPPS_XMM_to_XMM +emitterT void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } +emitterT void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } +emitterT void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } +emitterT void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } +emitterT void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } +emitterT void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } +emitterT void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } +emitterT void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } +emitterT void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } +emitterT void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } +emitterT void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } +emitterT void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } +emitterT void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } +emitterT void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } +emitterT void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } +emitterT void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//Scalar Single-Precission FP compare (CMPccSS) * +//********************************************************************************** +//missing SSE_CMPSS_I8_to_XMM +// SSE_CMPSS_M32_to_XMM +// SSE_CMPSS_XMM_to_XMM +emitterT void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } +emitterT void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } +emitterT void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } +emitterT void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } +emitterT void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } +emitterT void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } +emitterT void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } +emitterT void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } +emitterT void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } +emitterT void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } +emitterT void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } +emitterT void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } +emitterT void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } +emitterT void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } +emitterT void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } +emitterT void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } + +emitterT void SSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } +emitterT void SSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } +emitterT void SSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } +emitterT void SSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } +emitterT void SSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } +emitterT void SSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } +emitterT void SSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } +emitterT void SSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } +emitterT void SSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } +emitterT void SSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } +emitterT void SSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } +emitterT void SSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } +emitterT void SSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } +emitterT void SSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } +emitterT void SSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } +emitterT void SSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } + +emitterT void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) +{ + RexR(0, to); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + RexRB(0, to, from); + write16( 0x2e0f ); + ModRM( 3, to, from ); +} + +emitterT void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) +{ + write8(0x66); + RexR(0, to); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + write8(0x66); + RexRB(0, to, from); + write16( 0x2e0f ); + ModRM( 3, to, from ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * +//********************************************************************************** +emitterT void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } +emitterT void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * +//********************************************************************************** +emitterT void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } +emitterT void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SQRTPS : Packed Single-Precision FP Square Root * +//********************************************************************************** +emitterT void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } +emitterT void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SQRTSS : Scalar Single-Precision FP Square Root * +//********************************************************************************** +emitterT void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } +emitterT void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } + +emitterT void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } +emitterT void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MAXPS: Return Packed Single-Precision FP Maximum * +//********************************************************************************** +emitterT void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } +emitterT void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } + +emitterT void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } +emitterT void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MAXSS: Return Scalar Single-Precision FP Maximum * +//********************************************************************************** +emitterT void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } +emitterT void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } + +emitterT void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } +emitterT void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * +//********************************************************************************** +emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } +emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * +//********************************************************************************** +emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } +emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } + +emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } +emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) +{ + write8(0xf3); + RexRB(0, to, from); + write16(0x2c0f); + ModRM(3, to, from); +} + +emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } +emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) +{ + write8(0xf3); + RexRB(0, to, from); + write16(0x2a0f); + ModRM(3, to, from); +} + +emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } +emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } + +emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } +emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * +//********************************************************************************** +emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } +emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } + +//**********************************************************************************/ +//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * +//********************************************************************************** +emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } +emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } + +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MINPS: Return Packed Single-Precision FP Minimum * +//********************************************************************************** +emitterT void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } +emitterT void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } + +emitterT void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } +emitterT void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } + +////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MINSS: Return Scalar Single-Precision FP Minimum * +//********************************************************************************** +emitterT void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } +emitterT void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } + +emitterT void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } +emitterT void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PMAXSW: Packed Signed Integer Word Maximum * +//********************************************************************************** +//missing + // SSE_PMAXSW_M64_to_MM +// SSE2_PMAXSW_M128_to_XMM +// SSE2_PMAXSW_XMM_to_XMM +emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PMINSW: Packed Signed Integer Word Minimum * +//********************************************************************************** +//missing + // SSE_PMINSW_M64_to_MM +// SSE2_PMINSW_M128_to_XMM +// SSE2_PMINSW_XMM_to_XMM +emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SHUFPS: Shuffle Packed Single-Precision FP Values * +//********************************************************************************** +emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } +emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } + +emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) +{ + RexRB(0, to, from); + write16(0xc60f); + WriteRmOffsetFrom(to, from, offset); + write8(imm8); +} + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SHUFPD: Shuffle Packed Double-Precision FP Values * +//********************************************************************************** +emitterT void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } +emitterT void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PSHUFD: Shuffle Packed DoubleWords * +//********************************************************************************** +emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) +{ + SSERtoR66( 0x700F ); + write8( imm8 ); +} +emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } + +emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } +emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } +emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } +emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } + +/////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * +//********************************************************************************** +emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } +emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * +//********************************************************************************** +emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } +emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//DIVPS : Packed Single-Precision FP Divide * +//********************************************************************************** +emitterT void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } +emitterT void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//DIVSS : Scalar Single-Precision FP Divide * +//********************************************************************************** +emitterT void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } +emitterT void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } + +emitterT void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } +emitterT void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//STMXCSR : Store Streaming SIMD Extension Control/Status * +//********************************************************************************** +emitterT void SSE_STMXCSR( uptr from ) { + write16( 0xAE0F ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//LDMXCSR : Load Streaming SIMD Extension Control/Status * +//********************************************************************************** +emitterT void SSE_LDMXCSR( uptr from ) { + write16( 0xAE0F ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PADDB,PADDW,PADDD : Add Packed Integers * +//********************************************************************************** +emitterT void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } +emitterT void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } +emitterT void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } +emitterT void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } +emitterT void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } +emitterT void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } +emitterT void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } +emitterT void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } + +/////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } +emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } +emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } +emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } +emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } +emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } +emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } +emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } +emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } +emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } +emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } +emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } + +//////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } +emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } + +//////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +emitterT void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } +emitterT void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } +emitterT void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } +emitterT void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } +emitterT void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } +emitterT void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } +emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } +emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } +emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } + +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) +{ + write8(0x66); + RexRB(0, to, from); + write16( 0x6e0f ); + ModRM( 0, to, from); +} + +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + write8(0x66); + RexRB(0, to, from); + write16( 0x6e0f ); + WriteRmOffsetFrom(to, from, offset); +} + +emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } +emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } + +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + write8(0x66); + RexRB(0, from, to); + write16( 0x7e0f ); + WriteRmOffsetFrom(from, to, offset); +} + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +emitterT void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } +emitterT void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } + +// logical and to &= from +emitterT void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } +emitterT void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } + +// to = (~to) & from +emitterT void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } +emitterT void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PXOR : SSE Bitwise XOR * +//********************************************************************************** +emitterT void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } +emitterT void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } +/////////////////////////////////////////////////////////////////////////////////////// + +emitterT void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) SSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } +emitterT void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } +emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } + +emitterT void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( AlwaysUseMovaps ) + SSE_MOVUPS_M128_to_XMM( to, from ); + else + { + write8(0xF3); + SSEMtoR(0x6F0F, 0); + } +} +emitterT void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) +{ + if( AlwaysUseMovaps ) + SSE_MOVUPS_XMM_to_M128( to, from ); + else + { + write8(0xF3); + SSERtoM(0x7F0F, 0); + } +} + +// shift right logical + +emitterT void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } +emitterT void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } +emitterT void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } +emitterT void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } +emitterT void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } +emitterT void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } +emitterT void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 2 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 3 , to ); + write8( imm8 ); +} + +// shift right arithmetic + +emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } +emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } +emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } +emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } +emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( imm8 ); +} + +// shift left logical + +emitterT void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } +emitterT void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } +emitterT void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } +emitterT void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } +emitterT void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } +emitterT void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } +emitterT void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 6 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 7 , to ); + write8( imm8 ); +} + +emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } +emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } + +emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } +emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } + +emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } +emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } + +emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } +emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } + +emitterT void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } +emitterT void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } + +emitterT void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } +emitterT void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } + +emitterT void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } +emitterT void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } + +emitterT void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } +emitterT void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } + +emitterT void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } +emitterT void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } +emitterT void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } +emitterT void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } + +emitterT void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } +emitterT void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } +emitterT void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } +emitterT void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } +emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } +emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } +emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } + +emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } +emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } +emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } + +emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } +emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } + +emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } +emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } +emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } +emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } + +emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } +emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } +emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } +emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } + +emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } +emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } + +emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } +emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } + +emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } +emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } +emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } +emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } + +emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } +emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } + +emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } + +emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } +emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } + +emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } + +emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } +emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } + +emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { + write8(0xf3); + RexRB(0, to, from); + write16( 0x120f); + ModRM( 3, to, from ); +} + +emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } +emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } +emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } + +// SSSE3 + +emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x1C380F); + ModRM(3, to, from); +} + +emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x1D380F); + ModRM(3, to, from); +} + +emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x1E380F); + ModRM(3, to, from); +} + +emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x0F3A0F); + ModRM(3, to, from); + write8(imm8); +} + +emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x08380F); + ModRM(3, to, from); +} + +emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x09380F); + ModRM(3, to, from); +} + +emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x0A380F); + ModRM(3, to, from); +} + +// SSE4.1 + +emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + write24(0x403A0F); + ModRM(3, to, from); + write8(imm8); +} + +emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) +{ + write8(0x66); + write24(0x403A0F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4)); + write8(imm8); +} + +emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x213A0F); + ModRM(3, to, from); + write8(imm8); +} + +emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x173A0F); + ModRM(3, to, from); + write8(imm8); +} + +emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x0C3A0F); + ModRM(3, to, from); + write8(imm8); +} + +emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x14380F); + ModRM(3, to, from); +} + +emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) +{ + write8(0x66); + RexR(0, to); + write24(0x14380F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4)); +} + +emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x25380F); + ModRM(3, to, from); +} + +emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x35380F); + ModRM(3, to, from); +} + +emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x223A0F); + ModRM(3, to, from); + write8(imm8); +} + +emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x3D380F); + ModRM(3, to, from); +} + +emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x39380F); + ModRM(3, to, from); +} + +emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x3F380F); + ModRM(3, to, from); +} + +emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x3B380F); + ModRM(3, to, from); +} + +emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) +{ + write8(0x66); + RexR(0, to); + write24(0x3D380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); +} + +emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) +{ + write8(0x66); + RexR(0, to); + write24(0x39380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); +} + +emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) +{ + write8(0x66); + RexR(0, to); + write24(0x3F380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); +} + +emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) +{ + write8(0x66); + RexR(0, to); + write24(0x3B380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); +} + +emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x28380F); + ModRM(3, to, from); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) +// This header should always be included *after* ix86.h. + +// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the +// overhead of dynarec instructions that use these, even thought the same check would +// have been done redundantly by the emitter function. + +emitterT void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); + else SSE_MOVAPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); + else SSE_MOVAPS_XMM_to_M128(to, from); +} + +emitterT void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); + else SSE_MOVAPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); + else SSE_MOVAPSRmtoR(to, from, offset); +} + +emitterT void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); + else SSE_MOVAPSRtoRm(to, from, offset); +} + +emitterT void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); + else SSE_MOVUPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); + else SSE_MOVUPS_XMM_to_M128(to, from); +} + +emitterT void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); + else SSE_MOVSS_M32_to_XMM(to, from); +} + +emitterT void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); + else SSE_MOVSS_XMM_to_M32(to, from); +} + +emitterT void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); + else SSE_MOVSS_Rm_to_XMM(to, from, offset); +} + +emitterT void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); + else SSE_MOVSS_XMM_to_Rm(to, from, offset); +} + +emitterT void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); + else SSE_ORPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); + else SSE_ORPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); + else SSE_XORPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); + else SSE_XORPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); + else SSE_ANDPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); + else SSE_ANDPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); + else SSE_ANDNPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); + else SSE_ANDNPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); + else SSE_UNPCKLPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); + else SSE_UNPCKLPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); + else SSE_UNPCKHPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); + else SSE_UNPCKHPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) { + SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); + if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); + } + else { + SSE_MOVHLPS_XMM_to_XMM(to, from); + } +} diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl deleted file mode 100644 index 971a33af17..0000000000 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ /dev/null @@ -1,1413 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -//------------------------------------------------------------------ -// SSE instructions -//------------------------------------------------------------------ - -// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions -// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache -// and some marginal speed gains as a result. (it's possible someday in the future the per- -// formance of the two instructions could change, so this constant is provided to restore MOVDQA -// use easily at a later time, if needed). - -static const bool AlwaysUseMovaps = true; - -#define SSEMtoR( code, overb ) \ - assert( to < XMMREGS ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) - -#define SSERtoM( code, overb ) \ - assert( from < XMMREGS), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) - -#define SSE_SS_MtoR( code, overb ) \ - assert( to < XMMREGS ), \ - write8( 0xf3 ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) - -#define SSE_SS_RtoM( code, overb ) \ - assert( from < XMMREGS), \ - write8( 0xf3 ), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) - -#define SSERtoR( code ) \ - assert( to < XMMREGS && from < XMMREGS), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) - -#define SSEMtoR66( code ) \ - write8( 0x66 ), \ - SSEMtoR( code, 0 ) - -#define SSERtoM66( code ) \ - write8( 0x66 ), \ - SSERtoM( code, 0 ) - -#define SSERtoR66( code ) \ - write8( 0x66 ), \ - SSERtoR( code ) - -#define _SSERtoR66( code ) \ - assert( to < XMMREGS && from < XMMREGS), \ - write8( 0x66 ), \ - RexRB(0, from, to), \ - write16( code ), \ - ModRM( 3, from, to ) - -#define SSE_SS_RtoR( code ) \ - assert( to < XMMREGS && from < XMMREGS), \ - write8( 0xf3 ), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) - -#define SSE_SD_MtoR( code, overb ) \ - assert( to < XMMREGS ) , \ - write8( 0xf2 ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) \ - -#define SSE_SD_RtoM( code, overb ) \ - assert( from < XMMREGS) , \ - write8( 0xf2 ), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) \ - -#define SSE_SD_RtoR( code ) \ - assert( to < XMMREGS && from < XMMREGS) , \ - write8( 0xf2 ), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) - -#define CMPPSMtoR( op ) \ - SSEMtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPPSRtoR( op ) \ - SSERtoR( 0xc20f ), \ - write8( op ) - -#define CMPSSMtoR( op ) \ - SSE_SS_MtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPSSRtoR( op ) \ - SSE_SS_RtoR( 0xc20f ), \ - write8( op ) - -#define CMPSDMtoR( op ) \ - SSE_SD_MtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPSDRtoR( op ) \ - SSE_SD_RtoR( 0xc20f ), \ - write8( op ) - -/* movups [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - RexRXB(0, to, from2, from); - write16( 0x100f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -/* movups xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - RexRXB(1, to, from2, from); - write16( 0x110f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -/* movups [r32] to r32 */ -emitterT void eSSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, to, from); - write16( 0x100f ); - ModRM( 0, to, from ); -} - -/* movups r32 to [r32] */ -emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write16( 0x110f ); - ModRM( 0, from, to ); -} - -/* movlps [r32] to r32 */ -emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) -{ - RexRB(1, to, from); - write16( 0x120f ); - ModRM( 0, to, from ); -} - -emitterT void eSSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); -} - -/* movaps r32 to [r32] */ -emitterT void eSSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write16( 0x130f ); - ModRM( 0, from, to ); -} - -emitterT void eSSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, from, to); - write16( 0x130f ); - WriteRmOffsetFrom(from, to, offset); -} - -/* movaps [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - assert( from != EBP ); - RexRXB(0, to, from2, from); - write16( 0x280f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -/* movaps xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - assert( from != EBP ); - RexRXB(0, to, from2, from); - write16( 0x290f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -// movaps [r32+offset] to r32 -emitterT void eSSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x280f ); - WriteRmOffsetFrom(to, from, offset); -} - -// movaps r32 to [r32+offset] -emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16( 0x290f ); - WriteRmOffsetFrom(from, to, offset); -} - -// movdqa [r32+offset] to r32 -emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) -{ - if( AlwaysUseMovaps ) - eSSE_MOVAPSRmtoROffset( to, from, offset ); - else - { - write8(0x66); - RexRB(0, to, from); - write16( 0x6f0f ); - WriteRmOffsetFrom(to, from, offset); - } -} - -// movdqa r32 to [r32+offset] -emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - if( AlwaysUseMovaps ) - eSSE_MOVAPSRtoRmOffset( to, from, offset ); - else - { - write8(0x66); - RexRB(0, from, to); - write16( 0x7f0f ); - WriteRmOffsetFrom(from, to, offset); - } -} - -// movups [r32+offset] to r32 -emitterT void eSSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); -} - -// movups r32 to [r32+offset] -emitterT void eSSE_MOVUPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16( 0x110f ); - WriteRmOffsetFrom(from, to, offset); -} - -//**********************************************************************************/ -//MOVAPS: Move aligned Packed Single Precision FP values * -//********************************************************************************** -emitterT void eSSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } -emitterT void eSSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } -emitterT void eSSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } - -emitterT void eSSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } -emitterT void eSSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } - -emitterT void eSSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } -emitterT void eSSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } -emitterT void eSSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } - -emitterT void eSSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) -{ - write8(0xf3); SSEMtoR( 0x7e0f, 0); -} - -emitterT void eSSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - write8(0xf3); SSERtoR( 0x7e0f); -} - -emitterT void eSSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) -{ - SSERtoM66(0xd60f); -} - -emitterT void eSSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) -{ - write8(0xf2); - SSERtoR( 0xd60f); -} -emitterT void eSSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) -{ - write8(0xf3); - SSERtoR( 0xd60f); -} - -//**********************************************************************************/ -//MOVSS: Move Scalar Single-Precision FP value * -//********************************************************************************** -emitterT void eSSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - write8(0xf3); - RexRB(0, from, to); - write16(0x110f); - ModRM(0, from, to); -} - -emitterT void eSSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } - -emitterT void eSSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - write8(0xf3); - RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void eSSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - write8(0xf3); - RexRB(0, from, to); - write16(0x110f); - WriteRmOffsetFrom(from, to, offset); -} - -emitterT void eSSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } -//**********************************************************************************/ -//MOVLPS: Move low Packed Single-Precision FP * -//********************************************************************************** -emitterT void eSSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } -emitterT void eSSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } - -emitterT void eSSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void eSSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16(0x130f); - WriteRmOffsetFrom(from, to, offset); -} - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVHPS: Move High Packed Single-Precision FP * -//********************************************************************************** -emitterT void eSSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } -emitterT void eSSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } - -emitterT void eSSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x160f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void eSSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16(0x170f); - WriteRmOffsetFrom(from, to, offset); -} - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVLHPS: Moved packed Single-Precision FP low to high * -//********************************************************************************** -emitterT void eSSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVHLPS: Moved packed Single-Precision FP High to Low * -//********************************************************************************** -emitterT void eSSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } - -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ANDPS: Logical Bit-wise AND for Single FP * -//********************************************************************************** -emitterT void eSSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } -emitterT void eSSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } - -emitterT void eSSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } -emitterT void eSSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * -//********************************************************************************** -emitterT void eSSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } -emitterT void eSSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } - -emitterT void eSSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } -emitterT void eSSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RCPPS : Packed Single-Precision FP Reciprocal * -//********************************************************************************** -emitterT void eSSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } -emitterT void eSSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } - -emitterT void eSSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } -emitterT void eSSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ORPS : Bit-wise Logical OR of Single-Precision FP Data * -//********************************************************************************** -emitterT void eSSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } -emitterT void eSSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } - -emitterT void eSSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } -emitterT void eSSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//XORPS : Bitwise Logical XOR of Single-Precision FP Values * -//********************************************************************************** -emitterT void eSSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } -emitterT void eSSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } - -emitterT void eSSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } -emitterT void eSSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ADDPS : ADD Packed Single-Precision FP Values * -//********************************************************************************** -emitterT void eSSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } -emitterT void eSSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ADDSS : ADD Scalar Single-Precision FP Values * -//********************************************************************************** -emitterT void eSSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } -emitterT void eSSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } - -emitterT void eSSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } -emitterT void eSSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SUBPS: Packed Single-Precision FP Subtract * -//********************************************************************************** -emitterT void eSSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } -emitterT void eSSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SUBSS : Scalar Single-Precision FP Subtract * -//********************************************************************************** -emitterT void eSSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } -emitterT void eSSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } - -emitterT void eSSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } -emitterT void eSSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MULPS : Packed Single-Precision FP Multiply * -//********************************************************************************** -emitterT void eSSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } -emitterT void eSSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MULSS : Scalar Single-Precision FP Multiply * -//********************************************************************************** -emitterT void eSSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } -emitterT void eSSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } - -emitterT void eSSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } -emitterT void eSSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } - -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//Packed Single-Precission FP compare (CMPccPS) * -//********************************************************************************** -//missing SSE_CMPPS_I8_to_XMM -// SSE_CMPPS_M32_to_XMM -// SSE_CMPPS_XMM_to_XMM -emitterT void eSSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } -emitterT void eSSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } -emitterT void eSSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } -emitterT void eSSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } -emitterT void eSSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } -emitterT void eSSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } -emitterT void eSSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } -emitterT void eSSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } -emitterT void eSSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } -emitterT void eSSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } -emitterT void eSSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } -emitterT void eSSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } -emitterT void eSSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } -emitterT void eSSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } -emitterT void eSSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } -emitterT void eSSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//Scalar Single-Precission FP compare (CMPccSS) * -//********************************************************************************** -//missing SSE_CMPSS_I8_to_XMM -// SSE_CMPSS_M32_to_XMM -// SSE_CMPSS_XMM_to_XMM -emitterT void eSSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } -emitterT void eSSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } -emitterT void eSSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } -emitterT void eSSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } -emitterT void eSSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } -emitterT void eSSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } -emitterT void eSSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } -emitterT void eSSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } -emitterT void eSSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } -emitterT void eSSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } -emitterT void eSSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } -emitterT void eSSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } -emitterT void eSSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } -emitterT void eSSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } -emitterT void eSSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } -emitterT void eSSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } - -emitterT void eSSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } -emitterT void eSSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } -emitterT void eSSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } -emitterT void eSSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } -emitterT void eSSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } -emitterT void eSSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } -emitterT void eSSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } -emitterT void eSSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } -emitterT void eSSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } -emitterT void eSSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } -emitterT void eSSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } -emitterT void eSSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } -emitterT void eSSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } -emitterT void eSSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } -emitterT void eSSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } -emitterT void eSSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } - -emitterT void eSSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) -{ - RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void eSSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); -} - -emitterT void eSSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) -{ - write8(0x66); - RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void eSSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * -//********************************************************************************** -emitterT void eSSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } -emitterT void eSSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * -//********************************************************************************** -emitterT void eSSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } -emitterT void eSSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SQRTPS : Packed Single-Precision FP Square Root * -//********************************************************************************** -emitterT void eSSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } -emitterT void eSSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SQRTSS : Scalar Single-Precision FP Square Root * -//********************************************************************************** -emitterT void eSSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } -emitterT void eSSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } - -emitterT void eSSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } -emitterT void eSSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MAXPS: Return Packed Single-Precision FP Maximum * -//********************************************************************************** -emitterT void eSSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } -emitterT void eSSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } - -emitterT void eSSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } -emitterT void eSSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MAXSS: Return Scalar Single-Precision FP Maximum * -//********************************************************************************** -emitterT void eSSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } -emitterT void eSSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } - -emitterT void eSSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } -emitterT void eSSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * -//********************************************************************************** -emitterT void eSSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } -emitterT void eSSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * -//********************************************************************************** -emitterT void eSSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } -emitterT void eSSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } - -emitterT void eSSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } -emitterT void eSSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) -{ - write8(0xf3); - RexRB(0, to, from); - write16(0x2c0f); - ModRM(3, to, from); -} - -emitterT void eSSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } -emitterT void eSSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) -{ - write8(0xf3); - RexRB(0, to, from); - write16(0x2a0f); - ModRM(3, to, from); -} - -emitterT void eSSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } -emitterT void eSSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } - -emitterT void eSSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } -emitterT void eSSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * -//********************************************************************************** -emitterT void eSSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } -emitterT void eSSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } - -//**********************************************************************************/ -//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * -//********************************************************************************** -emitterT void eSSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } -emitterT void eSSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } - -emitterT void eSSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MINPS: Return Packed Single-Precision FP Minimum * -//********************************************************************************** -emitterT void eSSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } -emitterT void eSSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } - -emitterT void eSSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } -emitterT void eSSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } - -////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MINSS: Return Scalar Single-Precision FP Minimum * -//********************************************************************************** -emitterT void eSSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } -emitterT void eSSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } - -emitterT void eSSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } -emitterT void eSSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PMAXSW: Packed Signed Integer Word Maximum * -//********************************************************************************** -//missing - // SSE_PMAXSW_M64_to_MM -// SSE2_PMAXSW_M128_to_XMM -// SSE2_PMAXSW_XMM_to_XMM -emitterT void eSSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PMINSW: Packed Signed Integer Word Minimum * -//********************************************************************************** -//missing - // SSE_PMINSW_M64_to_MM -// SSE2_PMINSW_M128_to_XMM -// SSE2_PMINSW_XMM_to_XMM -emitterT void eSSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SHUFPS: Shuffle Packed Single-Precision FP Values * -//********************************************************************************** -emitterT void eSSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } - -emitterT void eSSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) -{ - RexRB(0, to, from); - write16(0xc60f); - WriteRmOffsetFrom(to, from, offset); - write8(imm8); -} - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SHUFPD: Shuffle Packed Double-Precision FP Values * -//********************************************************************************** -emitterT void eSSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } -emitterT void eSSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PSHUFD: Shuffle Packed DoubleWords * -//********************************************************************************** -emitterT void eSSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) -{ - SSERtoR66( 0x700F ); - write8( imm8 ); -} -emitterT void eSSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } - -emitterT void eSSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } -emitterT void eSSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } -emitterT void eSSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } -emitterT void eSSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } - -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * -//********************************************************************************** -emitterT void eSSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } -emitterT void eSSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * -//********************************************************************************** -emitterT void eSSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } -emitterT void eSSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//DIVPS : Packed Single-Precision FP Divide * -//********************************************************************************** -emitterT void eSSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } -emitterT void eSSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//DIVSS : Scalar Single-Precision FP Divide * -//********************************************************************************** -emitterT void eSSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } -emitterT void eSSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } - -emitterT void eSSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } -emitterT void eSSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//STMXCSR : Store Streaming SIMD Extension Control/Status * -//********************************************************************************** -emitterT void eSSE_STMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//LDMXCSR : Load Streaming SIMD Extension Control/Status * -//********************************************************************************** -emitterT void eSSE_LDMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PADDB,PADDW,PADDD : Add Packed Integers * -//********************************************************************************** -emitterT void eSSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } -emitterT void eSSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } -emitterT void eSSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } -emitterT void eSSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } -emitterT void eSSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } -emitterT void eSSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } -emitterT void eSSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } -emitterT void eSSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } - -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PCMPxx: Compare Packed Integers * -//********************************************************************************** -emitterT void eSSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } -emitterT void eSSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } -emitterT void eSSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } -emitterT void eSSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } -emitterT void eSSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } -emitterT void eSSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } -emitterT void eSSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } -emitterT void eSSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } -emitterT void eSSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } -emitterT void eSSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } -emitterT void eSSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } -emitterT void eSSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } - -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PEXTRW,PINSRW: Packed Extract/Insert Word * -//********************************************************************************** -emitterT void eSSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } -emitterT void eSSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } - -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PSUBx: Subtract Packed Integers * -//********************************************************************************** -emitterT void eSSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } -emitterT void eSSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } -emitterT void eSSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } -emitterT void eSSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } -emitterT void eSSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } -emitterT void eSSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } -emitterT void eSSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } -emitterT void eSSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVD: Move Dword(32bit) to /from XMM reg * -//********************************************************************************** -emitterT void eSSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } -emitterT void eSSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } - -emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x6e0f ); - ModRM( 0, to, from); -} - -emitterT void eSSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x6e0f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void eSSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } - -emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - write8(0x66); - RexRB(0, from, to); - write16( 0x7e0f ); - ModRM( 0, from, to ); -} - -emitterT void eSSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - write8(0x66); - RexRB(0, from, to); - write16( 0x7e0f ); - WriteRmOffsetFrom(from, to, offset); -} - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//POR : SSE Bitwise OR * -//********************************************************************************** -emitterT void eSSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } -emitterT void eSSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } - -// logical and to &= from -emitterT void eSSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } -emitterT void eSSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } - -// to = (~to) & from -emitterT void eSSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } -emitterT void eSSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PXOR : SSE Bitwise XOR * -//********************************************************************************** -emitterT void eSSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } -emitterT void eSSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } -/////////////////////////////////////////////////////////////////////////////////////// - -emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } -emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } -emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } - -emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( AlwaysUseMovaps ) - eSSE_MOVUPS_M128_to_XMM( to, from ); - else - { - write8(0xF3); - SSEMtoR(0x6F0F, 0); - } -} -emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) -{ - if( AlwaysUseMovaps ) - eSSE_MOVUPS_XMM_to_M128( to, from ); - else - { - write8(0xF3); - SSERtoM(0x7F0F, 0); - } -} - -// shift right logical - -emitterT void eSSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } -emitterT void eSSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } -emitterT void eSSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } -emitterT void eSSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } -emitterT void eSSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } -emitterT void eSSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } -emitterT void eSSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 2 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 3 , to ); - write8( imm8 ); -} - -// shift right arithmetic - -emitterT void eSSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } -emitterT void eSSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } -emitterT void eSSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } -emitterT void eSSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } -emitterT void eSSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( imm8 ); -} - -// shift left logical - -emitterT void eSSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } -emitterT void eSSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } -emitterT void eSSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } -emitterT void eSSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } -emitterT void eSSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } -emitterT void eSSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } -emitterT void eSSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 6 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 7 , to ); - write8( imm8 ); -} - -emitterT void eSSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } -emitterT void eSSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } - -emitterT void eSSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } -emitterT void eSSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } - -emitterT void eSSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } -emitterT void eSSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } - -emitterT void eSSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } -emitterT void eSSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } - -emitterT void eSSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } -emitterT void eSSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } - -emitterT void eSSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } -emitterT void eSSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } - -emitterT void eSSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } -emitterT void eSSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } - -emitterT void eSSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } -emitterT void eSSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } - -emitterT void eSSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } -emitterT void eSSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } -emitterT void eSSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } -emitterT void eSSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } - -emitterT void eSSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } -emitterT void eSSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } -emitterT void eSSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } -emitterT void eSSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word -//********************************************************************************** -emitterT void eSSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } -emitterT void eSSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } -emitterT void eSSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } -emitterT void eSSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } - -emitterT void eSSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } -emitterT void eSSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } - -//**********************************************************************************/ -//PUNPCKHWD: Unpack 16bit high -//********************************************************************************** -emitterT void eSSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } -emitterT void eSSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } - -emitterT void eSSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } -emitterT void eSSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } - -emitterT void eSSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } -emitterT void eSSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } -emitterT void eSSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } -emitterT void eSSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } - -emitterT void eSSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } -emitterT void eSSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } -emitterT void eSSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } -emitterT void eSSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } - -emitterT void eSSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } -emitterT void eSSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } - -emitterT void eSSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } -emitterT void eSSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } - -emitterT void eSSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } -emitterT void eSSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } -emitterT void eSSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } -emitterT void eSSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } - -emitterT void eSSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } -emitterT void eSSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } - -emitterT void eSSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } - -emitterT void eSSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } -emitterT void eSSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } - -emitterT void eSSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } - -emitterT void eSSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } -emitterT void eSSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } - -emitterT void eSSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0xf3); - RexRB(0, to, from); - write16( 0x120f); - ModRM( 3, to, from ); -} - -emitterT void eSSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } -emitterT void eSSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } -emitterT void eSSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } - -// SSSE3 - -emitterT void eSSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1C380F); - ModRM(3, to, from); -} - -emitterT void eSSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1D380F); - ModRM(3, to, from); -} - -emitterT void eSSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1E380F); - ModRM(3, to, from); -} - -emitterT void eSSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0F3A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void eSSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x08380F); - ModRM(3, to, from); -} - -emitterT void eSSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x09380F); - ModRM(3, to, from); -} - -emitterT void eSSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0A380F); - ModRM(3, to, from); -} - -// SSE4.1 - -emitterT void eSSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - write24(0x403A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void eSSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) -{ - write8(0x66); - write24(0x403A0F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); - write8(imm8); -} - -emitterT void eSSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x213A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void eSSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x173A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void eSSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0C3A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void eSSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x14380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x14380F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); -} - -emitterT void eSSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x25380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x35380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x223A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void eSSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3D380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x39380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3F380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3B380F); - ModRM(3, to, from); -} - -emitterT void eSSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3D380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void eSSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x39380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void eSSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3F380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void eSSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3B380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void eSSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x28380F); - ModRM(3, to, from); -} diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index 9caa04c6a2..b198c336b5 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -22,164 +22,30 @@ // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) // This header should always be included *after* ix86.h. -#ifndef _ix86_included_ -#error Dependency fail: Please define _EmitterId_ and include ix86.h first. -#endif - // Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the // overhead of dynarec instructions that use these. -static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); - else SSE_MOVAPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); - else SSE_MOVAPS_XMM_to_M128(to, from); -} - -static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); - else SSE_MOVAPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset); - else SSE_MOVAPSRmtoROffset(to, from, offset); -} - -static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset); - else SSE_MOVAPSRtoRmOffset(to, from, offset); -} - -static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); - else SSE_MOVUPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); - else SSE_MOVUPS_XMM_to_M128(to, from); -} - -static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); - else SSE_MOVSS_M32_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); - else SSE_MOVSS_XMM_to_M32(to, from); -} - -static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from); - else SSE_MOVSS_XMM_to_Rm(to, from); -} - -static __forceinline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset); - else SSE_MOVSS_RmOffset_to_XMM(to, from, offset); -} - -static __forceinline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset); - else SSE_MOVSS_XMM_to_RmOffset(to, from, offset); -} - -static __forceinline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); - else SSE_ORPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); - else SSE_ORPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); - else SSE_XORPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); - else SSE_XORPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); - else SSE_ANDPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); - else SSE_ANDPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); - else SSE_ANDNPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); - else SSE_ANDNPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); - else SSE_UNPCKLPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); - else SSE_UNPCKLPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); - else SSE_UNPCKHPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); - else SSE_UNPCKHPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) { - SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); - if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); - } - else { - SSE_MOVHLPS_XMM_to_XMM(to, from); - } -} +extern void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index 5ef532ffcd..3193a4ffb6 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "ix86/ix86.h" // used to make sure regs don't get changed while in recompiler @@ -27,8 +27,8 @@ u8 g_globalMMXSaved = 0; u8 g_globalXMMSaved = 0; -PCSX2_ALIGNED16( static u64 g_globalMMXData[8] ); -PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] ); +PCSX2_ALIGNED16( u64 g_globalMMXData[8] ); +PCSX2_ALIGNED16( u64 g_globalXMMData[2*XMMREGS] ); ///////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index e4fb71d84d..ede42f92af 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -29,6 +29,7 @@ // general types typedef int x86IntRegType; + #define EAX 0 #define EBX 3 #define ECX 1 @@ -149,3 +150,252 @@ struct CPUINFO{ extern CPUINFO cpuinfo; //------------------------------------------------------------------ + +// templated version of is_s8 is required, so that u16's get correct sign extension treatment. +template< typename T > +static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } + +namespace x86Emitter +{ + class x86ModRm; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + struct x86Register32 + { + static const x86Register32 Empty; // defined as an empty/unused value (-1) + + int Id; + + x86Register32( const x86Register32& src ) : Id( src.Id ) {} + x86Register32() : Id( -1 ) {} + explicit x86Register32( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register32& src ) const { return Id == src.Id; } + bool operator!=( const x86Register32& src ) const { return Id != src.Id; } + + x86ModRm operator+( const x86Register32& right ) const; + x86ModRm operator+( const x86ModRm& right ) const; + x86ModRm operator+( s32 right ) const; + + x86ModRm operator*( u32 factor ) const; + + x86Register32& operator=( const x86Register32& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Similar to x86Register, but without the ability to add/combine them with ModSib. + // + class x86Register16 + { + public: + static const x86Register16 Empty; + + int Id; + + x86Register16( const x86Register16& src ) : Id( src.Id ) {} + x86Register16() : Id( -1 ) {} + explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register16& src ) const { return Id == src.Id; } + bool operator!=( const x86Register16& src ) const { return Id != src.Id; } + + x86Register16& operator=( const x86Register16& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Similar to x86Register, but without the ability to add/combine them with ModSib. + // + class x86Register8 + { + public: + static const x86Register8 Empty; + + int Id; + + x86Register8( const x86Register16& src ) : Id( src.Id ) {} + x86Register8() : Id( -1 ) {} + explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register8& src ) const { return Id == src.Id; } + bool operator!=( const x86Register8& src ) const { return Id != src.Id; } + + x86Register8& operator=( const x86Register8& src ) + { + Id = src.Id; + return *this; + } + }; + + // Use 32 bit registers as out index register (for ModSig memory address calculations) + typedef x86Register32 x86IndexReg; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + class x86ModRm + { + public: + x86IndexReg Base; // base register (no scale) + x86IndexReg Index; // index reg gets multiplied by the scale + int Factor; // scale applied to the index register, in factor form (not a shift!) + s32 Displacement; // address displacement + + public: + x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) : + Base( base ), + Index( index ), + Factor( factor ), + Displacement( displacement ) + { + } + + explicit x86ModRm( x86IndexReg base, int displacement=0 ) : + Base( base ), + Index(), + Factor(0), + Displacement( displacement ) + { + } + + explicit x86ModRm( s32 displacement ) : + Base(), + Index(), + Factor(0), + Displacement( displacement ) + { + } + + static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 ); + + public: + bool IsByteSizeDisp() const { return is_s8( Displacement ); } + x86IndexReg GetEitherReg() const; + + x86ModRm& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + x86ModRm& Add( const x86IndexReg& src ); + x86ModRm& Add( const x86ModRm& src ); + + x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } + x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // ModSib - Internal low-level representation of the ModRM/SIB information. + // + // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that + // the Base, Index, Scale, and Displacement values are all valid, and it serves as a type- + // safe layer between the x86Register's operators (which generate x86ModRm types) and the + // emitter's ModSib instruction forms. Without this, the x86Register would pass as a + // ModSib type implicitly, and that would cause ambiguity on a number of instructions. + // + class ModSib + { + public: + x86IndexReg Base; // base register (no scale) + x86IndexReg Index; // index reg gets multiplied by the scale + int Scale; // scale applied to the index register, in scale/shift form + s32 Displacement; // offset applied to the Base/Index registers. + + explicit ModSib( const x86ModRm& src ); + explicit ModSib( s32 disp ); + ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); + + x86IndexReg GetEitherReg() const; + bool IsByteSizeDisp() const { return is_s8( Displacement ); } + + ModSib& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); } + ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); } + + protected: + void Reduce(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // x86IndexerType - This is a static class which provisions our ptr[] syntax. + // + struct x86IndexerType + { + // passthrough instruction, allows ModSib to pass silently through ptr translation + // without doing anything and without compiler error. + const ModSib& operator[]( const ModSib& src ) const { return src; } + + ModSib operator[]( x86IndexReg src ) const + { + return ModSib( src, x86IndexReg::Empty ); + } + + ModSib operator[]( const x86ModRm& src ) const + { + return ModSib( src ); + } + + ModSib operator[]( uptr src ) const + { + return ModSib( src ); + } + + ModSib operator[]( void* src ) const + { + return ModSib( (uptr)src ); + } + + x86IndexerType() {} + }; + + // ------------------------------------------------------------------------ + extern const x86IndexerType ptr; + + extern const x86Register32 eax; + extern const x86Register32 ebx; + extern const x86Register32 ecx; + extern const x86Register32 edx; + extern const x86Register32 esi; + extern const x86Register32 edi; + extern const x86Register32 ebp; + extern const x86Register32 esp; + + extern const x86Register16 ax; + extern const x86Register16 bx; + extern const x86Register16 cx; + extern const x86Register16 dx; + extern const x86Register16 si; + extern const x86Register16 di; + extern const x86Register16 bp; + extern const x86Register16 sp; + + extern const x86Register8 al; + extern const x86Register8 cl; + extern const x86Register8 dl; + extern const x86Register8 bl; + extern const x86Register8 ah; + extern const x86Register8 ch; + extern const x86Register8 dh; + extern const x86Register8 bh; +} \ No newline at end of file diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 4613a13acb..d54d8367ab 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -41,8 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->regs = vuRegsPtr; mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); - mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8; - mVU->cacheAddr = 0xC0000000 + (vuIndex ? mVU->cacheSize : 0); + mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; mVU->cache = NULL; mVUreset(); @@ -55,22 +54,36 @@ microVUt(void) mVUreset() { mVUclose(); // Close // Create Block Managers - for (int i; i <= mVU->prog.max; i++) { - for (u32 j; j < (mVU->progSize / 2); j++) { + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { mVU->prog.prog[i].block[j] = new microBlockManager(); } } // Dynarec Cache - mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0x10000000, (vuIndex ? "Micro VU1" : "Micro VU0")); - if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); + mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); + mVU->ptr = mVU->cache; - // Other Variables + // Setup Entrance/Exit Points + mVUdispatcherA(); + mVUdispatcherB(); + + // Program Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; mVU->prog.cleared = 1; mVU->prog.cur = -1; mVU->prog.total = -1; + + // Setup Dynarec Cache Limits for Each Program + u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes + for (int i = 0; i <= mVU->prog.max; i++) { + mVU->prog.prog[i].x86start = z; + mVU->prog.prog[i].x86ptr = z; + z += (mVU->cacheSize / (mVU->prog.max + 1)); + mVU->prog.prog[i].x86end = z; + } } // Free Allocated Resources @@ -81,8 +94,8 @@ microVUt(void) mVUclose() { if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; } // Delete Block Managers - for (int i; i <= mVU->prog.max; i++) { - for (u32 j; j < (mVU->progSize / 2); j++) { + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j]; } } @@ -99,33 +112,6 @@ microVUt(void) mVUclear(u32 addr, u32 size) { // that its probably not worth it... } -// Executes for number of cycles -microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { -/* - Pseudocode: (ToDo: implement # of cycles) - 1) Search for existing program - 2) If program not found, goto 5 - 3) Search for recompiled block - 4) If recompiled block found, goto 6 - 5) Recompile as much blocks as possible - 6) Return start execution address of block -*/ - microVU* mVU = mVUx; - if ( mVUsearchProg(mVU) ) { // Found Program - //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); - //if (block) return block->x86ptrStart; // Found Block - } - // Recompile code - return NULL; -} - -void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { - return mVUexecute<0>(startPC, cycles); -} -void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { - return mVUexecute<1>(startPC, cycles); -} - //------------------------------------------------------------------ // Micro VU - Private Functions //------------------------------------------------------------------ @@ -133,6 +119,7 @@ void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { // Clears program data (Sets used to 1 because calling this function implies the program will be used at least once) __forceinline void mVUclearProg(microVU* mVU, int progIndex) { mVU->prog.prog[progIndex].used = 1; + mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; for (u32 i = 0; i < (mVU->progSize / 2); i++) { mVU->prog.prog[progIndex].block[i]->reset(); } @@ -171,7 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { - if (i == mVU->prog.cur) SysPrintf("microVU: Same micro program sent!\n"); + if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; mVU->prog.cleared = 0; mVU->prog.prog[i].used++; @@ -206,98 +193,31 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { } } -//------------------------------------------------------------------ -// Dispatcher Functions -//------------------------------------------------------------------ - -#ifdef _MSC_VER -// Runs VU0 for number of cycles -__declspec(naked) void __fastcall startVU0(u32 startPC, u32 cycles) { - __asm { - // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. - call mVUexecuteVU0 - - /*backup cpu state*/ - push ebx; - push ebp; - push esi; - push edi; - - ldmxcsr g_sseVUMXCSR - /* Should set xmmZ? */ - jmp eax - } -} - -// Runs VU1 for number of cycles -__declspec(naked) void __fastcall startVU1(u32 startPC, u32 cycles) { - __asm { - - call mVUexecuteVU1 - - /*backup cpu state*/ - push ebx; - push ebp; - push esi; - push edi; - - ldmxcsr g_sseVUMXCSR - - jmp eax - } -} - -// Exit point -__declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) { - __asm { - - //call mVUcleanUpVU0 - - /*restore cpu state*/ - pop edi; - pop esi; - pop ebp; - pop ebx; - - ldmxcsr g_sseMXCSR - emms - - ret - } -} -#else -extern "C" { - extern void __fastcall startVU0(u32 startPC, u32 cycles); - extern void __fastcall startVU1(u32 startPC, u32 cycles); - extern void __fastcall endVU0(u32 startPC, u32 cycles); -} -#endif - //------------------------------------------------------------------ // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ -__forceinline void initVUrec(VURegs* vuRegs, const int vuIndex) { +void initVUrec(VURegs* vuRegs, const int vuIndex) { if (!vuIndex) mVUinit<0>(vuRegs); else mVUinit<1>(vuRegs); } -__forceinline void closeVUrec(const int vuIndex) { +void closeVUrec(const int vuIndex) { if (!vuIndex) mVUclose<0>(); else mVUclose<1>(); } -__forceinline void resetVUrec(const int vuIndex) { +void resetVUrec(const int vuIndex) { if (!vuIndex) mVUreset<0>(); else mVUreset<1>(); } -__forceinline void clearVUrec(u32 addr, u32 size, const int vuIndex) { +void clearVUrec(u32 addr, u32 size, const int vuIndex) { if (!vuIndex) mVUclear<0>(addr, size); else mVUclear<1>(addr, size); } -__forceinline void runVUrec(u32 startPC, u32 cycles, const int vuIndex) { +void runVUrec(u32 startPC, u32 cycles, const int vuIndex) { if (!vuIndex) startVU0(startPC, cycles); else startVU1(startPC, cycles); } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index c37a084043..81294fbe28 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -18,7 +18,6 @@ #pragma once #define mVUdebug // Prints Extra Info to Console -#define _EmitterId_ (vuIndex+1) #include "Common.h" #include "VU.h" #include "GS.h" @@ -92,9 +91,12 @@ public: template struct microProgram { - u32 data[progSize]; - u32 used; // Number of times its been used - microBlockManager* block[progSize / 2]; + u32 data[progSize/4]; + u32 used; // Number of times its been used + u8* x86ptr; // Pointer to program's recompilation code + u8* x86start; // Start of program's rec-cache + u8* x86end; // Limit of program's rec-cache + microBlockManager* block[progSize/8]; microAllocInfo allocInfo; }; @@ -113,30 +115,24 @@ struct microProgManager { struct microVU { u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size - u32 progSize; // VU Micro Program Size (microSize/8) - u32 cacheAddr; // VU Cache Start Address + u32 progSize; // VU Micro Program Size (microSize/4) static const u32 cacheSize = 0x500000; // VU Cache Size - microProgManager<0x1000> prog; // Micro Program Data + microProgManager<0x4000> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) + u8* startFunct; // Ptr Function to the Start code for recompiled programs + u8* exitFunct; // Ptr Function to the Exit code for recompiled programs u8* ptr; // Pointer to next place to write recompiled code to u32 code; // Contains the current Instruction u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) u32 divFlag; // 1 instance of I/D flags - -/* - uptr x86eax; // Accumulator register. Used in arithmetic operations. - uptr x86ecx; // Counter register. Used in shift/rotate instructions. - uptr x86edx; // Data register. Used in arithmetic operations and I/O operations. - uptr x86ebx; // Base register. Used as a pointer to data (located in DS in segmented mode). - uptr x86esp; // Stack Pointer register. Pointer to the top of the stack. - uptr x86ebp; // Stack Base Pointer register. Used to point to the base of the stack. - uptr x86esi; // Source register. Used as a pointer to a source in stream operations. - uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations. -*/ + u32 VIbackup[2]; // Holds a backup of a VI reg if modified before a branch + u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) + u32 p; // Holds current P instance index + u32 q; // Holds current Q instance index }; // microVU rec structs @@ -147,14 +143,24 @@ extern PCSX2_ALIGNED16(microVU microVU1); extern void (*mVU_UPPER_OPCODE[64])( VURegs* VU, s32 info ); extern void (*mVU_LOWER_OPCODE[128])( VURegs* VU, s32 info ); +// Main Functions +microVUt(void) mVUinit(VURegs*); +microVUt(void) mVUreset(); +microVUt(void) mVUclose(); +microVUt(void) mVUclear(u32, u32); + +// Private Functions __forceinline void mVUclearProg(microVU* mVU, int progIndex); __forceinline int mVUfindLeastUsedProg(microVU* mVU); __forceinline int mVUsearchProg(microVU* mVU); __forceinline void mVUcacheProg(microVU* mVU, int progIndex); +void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); +void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); -#ifdef __LINUX__ -microVUt(void) mVUreset(); -microVUt(void) mVUclose(); +#ifndef __LINUX__ +typedef void (__fastcall *mVUrecCall)(u32, u32); +#else +typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if this is correct syntax (should be close xD) #endif // Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files) @@ -163,3 +169,4 @@ microVUt(void) mVUclose(); #include "microVU_Alloc.inl" #include "microVU_Tables.inl" #include "microVU_Compile.inl" +#include "microVU_Execute.inl" diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index c539eb1297..cb6bbbbb5f 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -33,6 +33,8 @@ struct microRegInfo { u8 VI[32]; u8 q; u8 p; + u8 r; + u8 xgkick; }; struct microTempRegInfo { @@ -42,40 +44,19 @@ struct microTempRegInfo { u8 VIreg; // Index of the VI reg u8 q; // Holds cycle info for Q reg u8 p; // Holds cycle info for P reg + u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified) + u8 xgkick; // Holds the cycle info for XGkick }; template struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) - u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) - u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag - u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time. + u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block + u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC - u32 info[pSize]; // bit 00 = Lower Instruction is NOP - // bit 01 - // bit 02 - // bit 03 - // bit 04 - // bit 05 = Write to Q1 or Q2? - // bit 06 = Read Q1 or Q2? - // bit 07 = Read/Write to P1 or P2? - // bit 08 = Update Mac Flags? - // bit 09 = Update Status Flags? - // bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance - // bit 11 - // bit 12 = Used with bit 13 to make a 2-bit key for status flag instance - // bit 13 - // bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance - // bit 15 - // bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance - // bit 17 - // bit 18 = Used with bit 19 to make a 2-bit key for status flag instance - // bit 19 - // bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance - // bit 21 - // bit 22 = Read VI(Fs) from backup memory? - // bit 23 = Read VI(Ft) from backup memory? + u32 startPC; // Start PC for Cur Block + u32 info[pSize/8]; // Info for Instructions in current block }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index ca6cee9e09..dee76e8f92 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) { // FMAC6 - Normal FMAC Opcodes (I Reg) //------------------------------------------------------------------ -#define getIreg(reg) { \ +#define getIreg(reg, modXYZW) { \ MOV32ItoR(gprT1, mVU->iReg); \ SSE2_MOVD_R_to_XMM(reg, gprT1); \ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 8); \ - if (!_XYZW_SS) { mVUunpack_xyzw(reg, reg, 0); } \ + if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \ } microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { @@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { Fs = xmmFs; Ft = xmmFt; Fd = xmmFs; - getIreg(Ft); + getIreg(Ft, 1); getReg6(Fs, _Fs_); } @@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) { ACC = xmmACC; Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs; Ft = xmmFt; - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmFs; ACC = xmmACC; - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) { Fd = xmmT1; ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) { ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -708,19 +708,7 @@ microVUt(void) mVUallocCFLAGb(int reg, int fInstance) { microVU* mVU = mVUx; MOV32RtoM(mVU->clipFlag[fInstance], reg); } -/* -microVUt(void) mVUallocDFLAGa(int reg) { - microVU* mVU = mVUx; - //if (!mVUdivFlag) { MOV32MtoR(reg, (uptr)&mVU->divFlag[readQ]); AND32ItoR(reg, 0xc00); } - //else if (mVUdivFlag & 1) { XOR32RtoR(reg, reg); } - //else { MOV32ItoR(reg, (u32)((mVUdivFlag << 9) & 0xc00)); } -} -microVUt(void) mVUallocDFLAGb(int reg) { - microVU* mVU = mVUx; - //MOV32RtoM((uptr)&mVU->divFlag[writeQ], reg); -} -*/ //------------------------------------------------------------------ // VI Reg Allocators //------------------------------------------------------------------ @@ -734,6 +722,12 @@ microVUt(void) mVUallocVIa(int GPRreg, int _reg_) { microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { microVU* mVU = mVUx; + if (backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) + MOV32RtoM((uptr)&mVU->VIbackup[1], GPRreg); + mVUallocVIa(GPRreg, _reg_); + MOV32RtoM((uptr)&mVU->VIbackup[0], GPRreg); + MOV32MtoR(GPRreg, (uptr)&mVU->VIbackup[1]); + } if (_reg_ == 0) { return; } else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); } else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index f35299ab28..224ebbbd09 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -102,6 +102,49 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { analyzeReg4(Ft); } +//------------------------------------------------------------------ +// IALU - IALU Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } + +microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { + microVU* mVU = mVUx; + if (!Id) { mVUinfo |= _isNOP; } + analyzeVIreg1(Is); + analyzeVIreg1(It); + analyzeVIreg2(Id, 1); +} + +microVUt(void) mVUanalyzeIALU2(int Is, int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; } + analyzeVIreg1(Is); + analyzeVIreg2(It, 1); +} + +//------------------------------------------------------------------ +// MR32 - MR32 Opcode +//------------------------------------------------------------------ + +// Flips xyzw stalls to yzwx +#define analyzeReg6(reg) { \ + if (reg) { \ + if (_X) { mVUstall = aMax(mVUstall, aReg(reg).y); } \ + if (_Y) { mVUstall = aMax(mVUstall, aReg(reg).z); } \ + if (_Z) { mVUstall = aMax(mVUstall, aReg(reg).w); } \ + if (_W) { mVUstall = aMax(mVUstall, aReg(reg).x); } \ + } \ +} + +microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { + microVU* mVU = mVUx; + if (!Ft) { mVUinfo |= _isNOP; } + analyzeReg6(Fs); + analyzeReg2(Ft); +} + //------------------------------------------------------------------ // FDIV - DIV/SQRT/RSQRT Opcodes //------------------------------------------------------------------ @@ -143,4 +186,120 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// MFP - MFP Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeMFP(int Ft) { + microVU* mVU = mVUx; // ToDo: Needs special info for P reg? + if (!Ft) { mVUinfo |= _isNOP; } + analyzeReg2(Ft); +} + +//------------------------------------------------------------------ +// LQx - LQ/LQD/LQI Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { + microVU* mVU = mVUx; + analyzeVIreg1(Is); + analyzeReg2(Ft); + if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; } + if (writeIs) { analyzeVIreg2(Is, 1); } +} + +//------------------------------------------------------------------ +// SQx - SQ/SQD/SQI Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { + microVU* mVU = mVUx; + analyzeReg1(Fs); + analyzeVIreg1(It); + if (writeIt) { analyzeVIreg2(It, 1); } +} + +//------------------------------------------------------------------ +// R*** - R Reg Opcodes +//------------------------------------------------------------------ + +#define analyzeRreg() { mVUregsTemp.r = 1; } + +microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { + microVU* mVU = mVUx; + analyzeReg5(Fs, Fsf); + analyzeRreg(); +} + +microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { + microVU* mVU = mVUx; + if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); } + analyzeReg2(Ft); + analyzeRreg(); +} + +//------------------------------------------------------------------ +// Sflag - Status Flag Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeSflag(int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; } + else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time + analyzeVIreg2(It, 1); +} + +microVUt(void) mVUanalyzeFSSET() { + microVU* mVU = mVUx; + int i, curPC = iPC; + for (i = mVUcount; i > 0; i--) { + incPC2(-2); + if (isSflag) break; + mVUinfo &= ~_doStatus; + } + iPC = curPC; +} + +//------------------------------------------------------------------ +// XGkick +//------------------------------------------------------------------ + +#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); } +#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; } + +microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { + microVU* mVU = mVUx; + analyzeVIreg1(Fs); + analyzeXGkick1(); + analyzeXGkick2(xCycles); +} + +//------------------------------------------------------------------ +// Branches - Branch Opcodes +//------------------------------------------------------------------ + +#define analyzeBranchVI(reg, infoVal) { \ + if (reg && (mVUcount > 0)) { /* Ensures branch is not first opcode in block */ \ + incPC(-2); \ + if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \ + mVUinfo |= _backupVI; \ + incPC(2); \ + mVUinfo |= infoVal; \ + } \ + else { incPC(2); } \ + } \ +} + +microVUt(void) mVUanalyzeBranch1(int Is) { + microVU* mVU = mVUx; + if (mVUregs.VI[Is]) { analyzeVIreg1(Is); } + else { analyzeBranchVI(Is, _memReadIs); } +} + +microVUt(void) mVUanalyzeBranch2(int Is, int It) { + microVU* mVU = mVUx; + if (mVUregs.VI[Is] || mVUregs.VI[It]) { analyzeVIreg1(Is); analyzeVIreg1(It); } + else { analyzeBranchVI(Is, _memReadIs); analyzeBranchVI(It, _memReadIt);} +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 49c517de5e..85c4938aa7 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -19,18 +19,6 @@ #pragma once #ifdef PCSX2_MICROVU -#ifdef mVUdebug -#define mVUdebugStuff1() { \ - if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ - if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ - if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ - if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ - if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ -} -#else -#define mVUdebugStuff1() {} -#endif - #define createBlock(blockEndPtr) { \ block.pipelineState = pipelineState; \ block.x86ptrStart = x86ptrStart; \ @@ -41,37 +29,89 @@ } \ } -#define curI mVUcurProg.data[iPC] -#define setCode() { mVU->code = curI; } -#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } -#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } +#define branchCase(Xcmp) \ + CMP16ItoM((uptr)mVU->branch, 0); \ + ajmp = Xcmp((uptr)0); \ + break + +#define branchCase2() { \ + incPC(-2); \ + MOV32ItoR(gprT1, (xPC + (2 * 8)) & ((vuIndex) ? 0x3fff:0xfff)); \ + mVUallocVIb(gprT1, _Ft_); \ + incPC(+2); \ +} + +#define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } +#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } +#define incP() { mVU->p = (mVU->p+1) & 1; } +#define incQ() { mVU->q = (mVU->q+1) & 1; } + +microVUt(void) mVUincCycles(int x) { + microVU* mVU = mVUx; + mVUcycles += x; + for (int z = 31; z > 0; z--) { + calcCycles(mVUregs.VF[z].x, x); + calcCycles(mVUregs.VF[z].y, x); + calcCycles(mVUregs.VF[z].z, x); + calcCycles(mVUregs.VF[z].w, x); + } + for (int z = 16; z > 0; z--) { + calcCycles(mVUregs.VI[z], x); + } + if (mVUregs.q) { + calcCycles(mVUregs.q, x); + if (!mVUregs.q) { incQ(); } // Do Status Flag Merging Stuff? + } + if (mVUregs.p) { + calcCycles(mVUregs.p, x); + if (!mVUregs.p) { incP(); } + } + calcCycles(mVUregs.r, x); + calcCycles(mVUregs.xgkick, x); +} microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); + if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg + mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP + mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector + mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); + mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z); + mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w); + } mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg; - mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg); + mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg; mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI; mVUregs.q = mVUregsTemp.q; mVUregs.p = mVUregsTemp.p; + mVUregs.r = mVUregsTemp.r; + mVUregs.xgkick = mVUregsTemp.xgkick; } -microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { +//------------------------------------------------------------------ +// Recompiler +//------------------------------------------------------------------ + +microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { microVU* mVU = mVUx; microBlock block; - int branch; + u8* thisPtr = mVUcurProg.x86Ptr; iPC = startPC / 4; - + // Searches for Existing Compiled Block (if found, then returns; else, compile) - microBlock* pblock = mVUblock[iPC]->search(pipelineState, pState); - if (block) { x86SetPtr(pblock->x86ptrEnd); return; } + microBlock* pblock = mVUblock[iPC/2]->search(pipelineState, pState); + if (block) { return pblock->x86ptrStart; } // First Pass setCode(); - branch = 0; - mVUbranch = 0; - mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage - for (;;) { + mVUbranch = 0; + mVUstartPC = iPC; + mVUcount = 0; + mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage + mVU->p = 0; // All blocks start at p index #0 + mVU->q = 0; // All blocks start at q index #0 + for (int branch = 0;; ) { startLoop(); mVUopU(); if (curI & _Ebit_) { branch = 1; } @@ -79,34 +119,70 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, if (curI & _Ibit_) { incPC(1); mVUinfo |= _isNOP; } else { incPC(1); mVUopL(); } mVUsetCycles(); + if (mVU->p) { mVUinfo |= _readP; } + if (mVU->q) { mVUinfo |= _readQ; } + else { mVUinfo |= _writeQ; } if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; } else if (branch == 1) { branch = 2; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } incPC(1); + incCycles(1); + mVUcount++; } // Second Pass - iPC = startPC; + iPC = mVUstartPC; setCode(); for (bool x = 1; x; ) { + // + // ToDo: status/mac flag stuff? + // if (isEOB) { x = 0; } - else if (isBranch) { mVUopU(); incPC(2); } + //if (isBranch2) { mVUopU(); incPC(2); } - mVUopU(); - if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } - else { incPC(1); mVUopL(); } + if (isNop) { mVUopU(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } + else if (!swapOps) { mVUopU(); incPC(1); mVUopL(); } + else { incPC(1); mVUopL(); incPC(-1); mVUopU(); incPC(1); } + if (!isBdelay) { incPC(1); } - else { - incPC(-2); // Go back to Branch Opcode - mVUopL(); // Run Branch Opcode + else { + u32* ajmp; switch (mVUbranch) { - case 1: break; - case 2: break; - case 3: break; + case 3: branchCase(JZ32); // IBEQ + case 4: branchCase(JGE32); // IBGEZ + case 5: branchCase(JG32); // IBGTZ + case 6: branchCase(JLE32); // IBLEQ + case 7: branchCase(JL32); // IBLTZ + case 8: branchCase(JNZ32); // IBNEQ + case 2: branchCase2(); // BAL + case 1: + // search for block + ajmp = JMP32((uptr)0); + + break; // B/BAL + case 9: branchCase2(); // JALR + case 10: break; // JR/JALR + //mVUcurProg.x86Ptr } - break; + return thisPtr; } } + // Do E-bit end stuff here + + incCycles(55); // Ensures Valid P/Q instances + mVUcycles -= 55; + if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + + MOV32ItoM((uptr)&mVU->p, mVU->p); + MOV32ItoM((uptr)&mVU->q, mVU->q); + AND32ItoM((uptr)µVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it... + MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); + JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); + return thisPtr; } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl new file mode 100644 index 0000000000..a1a875e39a --- /dev/null +++ b/pcsx2/x86/microVU_Execute.inl @@ -0,0 +1,164 @@ +/* Pcsx2 - Pc Ps2 Emulator +* Copyright (C) 2009 Pcsx2-Playground Team +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ +#pragma once +#ifdef PCSX2_MICROVU + +//------------------------------------------------------------------ +// Dispatcher Functions +//------------------------------------------------------------------ + +// Generates the code for entering recompiled blocks +microVUt(void) mVUdispatcherA() { + static u32 PCSX2_ALIGNED16(vuMXCSR); + microVU* mVU = mVUx; + x86SetPtr(mVU->ptr); + mVU->startFunct = mVU->ptr; + + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. + if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } + else { CALLFunc((uptr)mVUexecuteVU1); } + + // Backup cpu state + PUSH32R(EBX); + PUSH32R(EBP); + PUSH32R(ESI); + PUSH32R(EDI); + + // Load VU's MXCSR state + vuMXCSR = g_sseVUMXCSR; + SSE_LDMXCSR((uptr)&vuMXCSR); + + // Load Regs + MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R]); + MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG]); + MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG]); + SHL32ItoR(gprF0, 16); + AND32ItoR(gprF1, 0xffff); + OR32RtoR (gprF0, gprF1); + MOV32RtoR(gprF1, gprF0); + MOV32RtoR(gprF2, gprF0); + MOV32RtoR(gprF3, gprF0); + + for (int i = 0; i < 8; i++) { + MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1]); + } + + SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC); + SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); + SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]); + SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ + + // Jump to Recompiled Code Block + JMPR(EAX); + mVU->ptr = x86Ptr; +} + +// Generates the code to exit from recompiled blocks +microVUt(void) mVUdispatcherB() { + static u32 PCSX2_ALIGNED16(eeMXCSR); + microVU* mVU = mVUx; + x86SetPtr(mVU->ptr); + mVU->exitFunct = mVU->ptr; + + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. + if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } + else { CALLFunc((uptr)mVUcleanUpVU1); } + + // Load EE's MXCSR state + eeMXCSR = g_sseMXCSR; + SSE_LDMXCSR((uptr)&eeMXCSR); + + // Save Regs + MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances + AND32ItoR(gprT1, 0xffff); + SHR32ItoR(gprF0, 16); + MOV32RtoM((uptr)&mVU->regs->VI[REG_R], gprR); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG], gprT1); + MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG], gprF0); + + for (int i = 0; i < 8; i++) { + MOVDMMXtoM((uptr)&mVU->regs->VI[i+1], i); + } + + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC); + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances + //SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + + // Restore cpu state + POP32R(EDI); + POP32R(ESI); + POP32R(EBP); + POP32R(EBX); + + EMMS(); + RET(); + + mVU->ptr = x86Ptr; + mVUcachCheck(mVU->cache, 512); +} + +//------------------------------------------------------------------ +// Execution Functions +//------------------------------------------------------------------ + +// Executes for number of cycles +microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { +/* + Pseudocode: (ToDo: implement # of cycles) + 1) Search for existing program + 2) If program not found, goto 5 + 3) Search for recompiled block + 4) If recompiled block found, goto 6 + 5) Recompile as much blocks as possible + 6) Return start execution address of block +*/ + microVU* mVU = mVUx; + mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); + if ( mVUsearchProg(mVU) ) { // Found Program + //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); + //if (block) return block->x86ptrStart; // Found Block + } + // Recompile code + return NULL; +} + +//------------------------------------------------------------------ +// Cleanup Functions +//------------------------------------------------------------------ + +microVUt(void) mVUcleanUp() { + microVU* mVU = mVUx; + mVU->ptr = mVUcurProg.x86ptr; + mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); +} + +//------------------------------------------------------------------ +// Caller Functions +//------------------------------------------------------------------ + +void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.startFunct)(startPC, cycles); } +void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); } +void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } +void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } +void mVUcleanUpVU0() { mVUcleanUp<0>(); } +void mVUcleanUpVU1() { mVUcleanUp<1>(); } + +#endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 8e7859e033..a247f00a6c 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -23,6 +23,10 @@ // Micro VU Micromode Lower instructions //------------------------------------------------------------------ +//------------------------------------------------------------------ +// DIV/SQRT/RSQRT +//------------------------------------------------------------------ + #define testZero(xmmReg, xmmTemp, gprTemp) { \ SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \ @@ -128,6 +132,10 @@ microVUf(void) mVU_RSQRT() { } } +//------------------------------------------------------------------ +// EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM +//------------------------------------------------------------------ + #define EATANhelper(addr) { \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ @@ -401,6 +409,10 @@ microVUf(void) mVU_ESUM() { } } +//------------------------------------------------------------------ +// FCAND/FCEQ/FCGET/FCOR/FCSET +//------------------------------------------------------------------ + microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; if (!recPass) {} @@ -456,6 +468,10 @@ microVUf(void) mVU_FCSET() { } } +//------------------------------------------------------------------ +// FMAND/FMEQ/FMOR +//------------------------------------------------------------------ + microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; if (!recPass) {} @@ -491,9 +507,13 @@ microVUf(void) mVU_FMOR() { } } +//------------------------------------------------------------------ +// FSAND/FSEQ/FSOR/FSSET +//------------------------------------------------------------------ + microVUf(void) mVU_FSAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); AND16ItoR(gprT1, _Imm12_); @@ -503,7 +523,7 @@ microVUf(void) mVU_FSAND() { microVUf(void) mVU_FSEQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); XOR16ItoR(gprT1, _Imm12_); @@ -515,7 +535,7 @@ microVUf(void) mVU_FSEQ() { microVUf(void) mVU_FSOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); OR16ItoR(gprT1, _Imm12_); @@ -525,20 +545,22 @@ microVUf(void) mVU_FSOR() { microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeFSSET(); } else { int flagReg; getFlagReg(flagReg, fsInstance); - MOV16ItoR(gprT1, (_Imm12_ & 0xfc0)); - //if (_Imm12_ & 0xc00) { mVUdivFlag = _Imm12_ >> 9; } - //else { mVUdivFlag = 1; } - //mVUdivFlagT = 4; + AND32ItoR(flagReg, 0x03f); + OR32ItoR(flagReg, (_Imm12_ & 0xfc0)); } } +//------------------------------------------------------------------ +// IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU +//------------------------------------------------------------------ + microVUf(void) mVU_IADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -552,7 +574,7 @@ microVUf(void) mVU_IADD() { microVUf(void) mVU_IADDI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm5_); @@ -562,7 +584,7 @@ microVUf(void) mVU_IADDI() { microVUf(void) mVU_IADDIU() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm12_); @@ -572,7 +594,7 @@ microVUf(void) mVU_IADDIU() { microVUf(void) mVU_IAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -585,7 +607,7 @@ microVUf(void) mVU_IAND() { microVUf(void) mVU_IOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -598,7 +620,7 @@ microVUf(void) mVU_IOR() { microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { if (_Ft_ != _Fs_) { mVUallocVIa(gprT1, _Fs_); @@ -615,7 +637,7 @@ microVUf(void) mVU_ISUB() { microVUf(void) mVU_ISUBIU() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, _Imm12_); @@ -623,18 +645,13 @@ microVUf(void) mVU_ISUBIU() { } } -microVUf(void) mVU_MOVE() { - microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } - else { - mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); - mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); - } -} +//------------------------------------------------------------------ +// MFIR/MFP/MOVE/MR32/MTIR +//------------------------------------------------------------------ microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); } else { mVUallocVIa(gprT1, _Fs_); MOVSX32R16toR(gprT1, gprT1); @@ -646,25 +663,25 @@ microVUf(void) mVU_MFIR() { microVUf(void) mVU_MFP() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeMFP(_Ft_); } else { getPreg(xmmFt); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } } -microVUf(void) mVU_MTIR() { +microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); } else { - MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); - mVUallocVIb(gprT1, _Ft_); + mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } } microVUf(void) mVU_MR32() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeMR32(_Fs_, _Ft_); } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } @@ -672,9 +689,22 @@ microVUf(void) mVU_MR32() { } } +microVUf(void) mVU_MTIR() { + microVU* mVU = mVUx; + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); } + else { + MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); + mVUallocVIb(gprT1, _Ft_); + } +} + +//------------------------------------------------------------------ +// ILW/ILWR +//------------------------------------------------------------------ + microVUf(void) mVU_ILW() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { if (!_Fs_) { MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); @@ -684,7 +714,7 @@ microVUf(void) mVU_ILW() { mVUallocVIa(gprT1, _Fs_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); - MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -693,25 +723,29 @@ microVUf(void) mVU_ILW() { microVUf(void) mVU_ILWR() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { if (!_Fs_) { - MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS ); + MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); mVUallocVIb(gprT1, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); mVUaddrFix(gprT1); - MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); mVUallocVIb(gprT1, _Ft_); } } } +//------------------------------------------------------------------ +// ISW/ISWR +//------------------------------------------------------------------ + microVUf(void) mVU_ISW() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { if (!_Fs_) { int imm = getVUmem(_Imm11_); @@ -726,17 +760,17 @@ microVUf(void) mVU_ISW() { mVUallocVIa(gprT2, _Ft_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); - if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); - if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); - if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); - if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } } microVUf(void) mVU_ISWR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { if (!_Fs_) { mVUallocVIa(gprT1, _Ft_); @@ -749,17 +783,21 @@ microVUf(void) mVU_ISWR() { mVUallocVIa(gprT1, _Fs_); mVUallocVIa(gprT2, _Ft_); mVUaddrFix(gprT1); - if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); - if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); - if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); - if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } } +//------------------------------------------------------------------ +// LQ/LQD/LQI +//------------------------------------------------------------------ + microVUf(void) mVU_LQ() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 0); } else { if (!_Fs_) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); @@ -777,9 +815,9 @@ microVUf(void) mVU_LQ() { microVUf(void) mVU_LQD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { - if (!_Fs_ && _Ft_) { + if (!_Fs_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -787,7 +825,7 @@ microVUf(void) mVU_LQD() { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. - if (_Ft_) { + if (!noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -798,15 +836,15 @@ microVUf(void) mVU_LQD() { microVUf(void) mVU_LQI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { - if (!_Fs_ && _Ft_) { + if (!_Fs_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); - if (_Ft_) { + if (!noWriteVF) { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -818,9 +856,13 @@ microVUf(void) mVU_LQI() { } } +//------------------------------------------------------------------ +// SQ/SQD/SQI +//------------------------------------------------------------------ + microVUf(void) mVU_SQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 0); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -838,7 +880,7 @@ microVUf(void) mVU_SQ() { microVUf(void) mVU_SQD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -857,7 +899,7 @@ microVUf(void) mVU_SQD() { microVUf(void) mVU_SQI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -875,9 +917,13 @@ microVUf(void) mVU_SQI() { } } +//------------------------------------------------------------------ +// RINIT/RGET/RNEXT/RXOR +//------------------------------------------------------------------ + microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprR, _Fs_, _Fsf_); @@ -890,7 +936,7 @@ microVUf(void) mVU_RINIT() { microVUt(void) mVU_RGET_() { microVU* mVU = mVUx; - if (_Ft_) { + if (!noWriteVF) { if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR); if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR); if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR); @@ -900,13 +946,13 @@ microVUt(void) mVU_RGET_() { microVUf(void) mVU_RGET() { microVU* mVU = mVUx; - if (!recPass) { /*if (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeR2(_Ft_, 1); } else { mVU_RGET_(); } } microVUf(void) mVU_RNEXT() { microVU* mVU = mVUx; - if (!recPass) { /*if (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeR2(_Ft_, 0); } else { // algorithm from www.project-fao.org MOV32RtoR(gprT1, gprR); @@ -928,7 +974,7 @@ microVUf(void) mVU_RNEXT() { microVUf(void) mVU_RXOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprT1, _Fs_, _Fsf_); @@ -938,21 +984,27 @@ microVUf(void) mVU_RXOR() { } } +//------------------------------------------------------------------ +// WaitP/WaitQ +//------------------------------------------------------------------ + microVUf(void) mVU_WAITP() { microVU* mVU = mVUx; - if (!recPass) {} - else {} + if (!recPass) { mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } } microVUf(void) mVU_WAITQ() { microVU* mVU = mVUx; - if (!recPass) {} - else {} + if (!recPass) { mVUstall = aMax(mVUstall, mVUregs.q); } } +//------------------------------------------------------------------ +// XTOP/XITOP +//------------------------------------------------------------------ + microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); @@ -961,13 +1013,17 @@ microVUf(void) mVU_XTOP() { microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop ); mVUallocVIb(gprT1, _Ft_); } } +//------------------------------------------------------------------ +// XGkick +//------------------------------------------------------------------ + microVUt(void) __fastcall mVU_XGKICK_(u32 addr) { microVU* mVU = mVUx; u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff)); @@ -981,61 +1037,106 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); } microVUf(void) mVU_XGKICK() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } else { mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall + PUSH32R(gprR); // gprR = EDX is volatile so backup if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); else CALLFunc((uptr)mVU_XGKICK1); + POP32R(gprR); // Restore } } //------------------------------------------------------------------ -// Branches +// Branches/Jumps //------------------------------------------------------------------ microVUf(void) mVU_B() { microVU* mVU = mVUx; - mVUbranch = 1; + mVUbranch = 1; + if (!recPass) { /*mVUinfo |= _isBranch2;*/ } } microVUf(void) mVU_BAL() { microVU* mVU = mVUx; - mVUbranch = 1; - if (recPass) { - MOV32ItoR(gprT1, (xPC + (2 * 8)) & 0xffff); - mVUallocVIb(gprT1, _Ft_); - } + mVUbranch = 2; + if (!recPass) { /*mVUinfo |= _isBranch2;*/ analyzeVIreg2(_Ft_, 1); } + else {} } microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 3; + if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 4; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + //SHR32ItoR(gprT1, 15); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; - mVUbranch = 2; -} -microVUf(void) mVU_IBLTZ() { - microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 5; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 6; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)mVU->branch, gprT1); + } +} +microVUf(void) mVU_IBLTZ() { + microVU* mVU = mVUx; + mVUbranch = 7; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + //SHR32ItoR(gprT1, 15); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 8; + if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_JR() { microVU* mVU = mVUx; - mVUbranch = 3; + mVUbranch = 9; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; - mVUbranch = 3; + mVUbranch = 10; + if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index ce9272f201..aaf45425d4 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -66,9 +66,9 @@ declareAllVariables //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ -#define _Ft_ ((mVU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ ((mVU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ ((mVU->code >> 6) & 0x1F) // The sa part of the instruction register +#define _Ft_ ((mVU->code >> 16) & 0x1F) // The ft/it part of the instruction register +#define _Fs_ ((mVU->code >> 11) & 0x1F) // The fs/is part of the instruction register +#define _Fd_ ((mVU->code >> 6) & 0x1F) // The fd/id part of the instruction register #define _X ((mVU->code>>24) & 0x1) #define _Y ((mVU->code>>23) & 0x1) @@ -143,20 +143,25 @@ declareAllVariables #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles +#define mVUcount mVUallocInfo.count #define mVUstall mVUallocInfo.maxStall -#define mVUdivFlag mVUallocInfo.divFlag -#define mVUdivFlagT mVUallocInfo.divFlagTimer #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp #define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] +#define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) -#define incCycles(x) { mVUcycles += x; } +#define curI mVUcurProg.data[iPC] +#define setCode() { mVU->code = curI; } +#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } +#define incCycles(x) { mVUincCycles(x); } #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch #define _isEOB (1<<2) // End of Block #define _isBdelay (1<<3) // Cur Instruction in Branch Delay slot +#define _isSflag (1<<4) // Cur Instruction uses status flag #define _writeQ (1<<5) #define _readQ (1<<6) #define _writeP (1<<7) @@ -166,17 +171,25 @@ declareAllVariables #define _doStatus (1<<9) #define _fmInstance (3<<10) #define _fsInstance (3<<12) -#define _fcInstance (3<<14) -#define _fpmInstance (3<<10) #define _fpsInstance (3<<12) +#define _fcInstance (3<<14) +#define _fpcInstance (3<<14) #define _fvmInstance (3<<16) #define _fvsInstance (3<<18) -#define _fvcInstance (3<<14) +#define _fvcInstance (3<<20) +#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) +#define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) +#define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches) +#define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) +#define _writesVI (1<<25) // Current Instruction writes to VI +#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction +//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) #define isEOB (mVUinfo & (1<<2)) #define isBdelay (mVUinfo & (1<<3)) +#define isSflag (mVUinfo & (1<<4)) #define writeQ ((mVUinfo >> 5) & 1) #define readQ ((mVUinfo >> 6) & 1) #define writeP ((mVUinfo >> 7) & 1) @@ -192,11 +205,32 @@ declareAllVariables #define fvmInstance ((mVUinfo >> 16) & 3) #define fvsInstance ((mVUinfo >> 18) & 3) #define fvcInstance ((mVUinfo >> 20) & 3) - -//#define getFs (mVUinfo & (1<<13)) -//#define getFt (mVUinfo & (1<<14)) -//#define fpmInstance (((u8)((mVUinfo & (3<<10)) >> 10) - 1) & 0x3) +#define noWriteVF (mVUinfo & (1<<21)) +#define backupVI (mVUinfo & (1<<22)) +#define memReadIs (mVUinfo & (1<<23)) +#define memReadIt (mVUinfo & (1<<24)) +#define writesVI (mVUinfo & (1<<25)) +#define swapOps (mVUinfo & (1<<26)) +//#define isBranch2 (mVUinfo & (1<<27)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) +#ifdef mVUdebug +#define mVUlog Console::Notice +#define mVUdebug1() { \ + if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ + if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ + if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ + if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ + if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ +} +#else +#define mVUlog 0&& +#define mVUdebug1() {} +#endif + +#define mVUcachCheck(start, limit) { \ + uptr diff = mVU->ptr - start; \ + if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ +} diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 6977b765e1..4154048046 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) { microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { switch( xyzw ) { - case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X - case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y - case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z - case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W - default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break; + case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X + case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y + case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z + case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W + default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break; } } @@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { switch ( xyzw ) { case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // YW case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4); break; // YZ case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // YZW case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // XW case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; //XZ - case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); + case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; //XZW case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // XYW case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; // XYZ - case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X - case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y - case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z - case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W - case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY - case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW - default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW + case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X + case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y + case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z + case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W + case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY + case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW + default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW } } @@ -251,7 +251,7 @@ microVUt(void) mVUaddrFix(int gprReg) { u8 *jmpA, *jmpB; CMP32ItoR(EAX, 0x400); jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs - AND32ItoR(EAX, 0x43f); + AND32ItoR(EAX, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! jmpB = JMP8(0); x86SetJ8(jmpA); AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 388af54658..f670531aa4 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -26,7 +26,7 @@ #pragma once -#define PLUGIN_VERSION 14 +#define PLUGIN_VERSION 15 #include "GSVector.h" diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 2ad320f9e8..c5aa8e1f88 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -349,8 +349,6 @@ protected: OverrideOutput(); - m_tc->InvalidateTextures(context->FRAME, context->ZBUF); - if(s_dump) { CString str; @@ -360,6 +358,8 @@ protected: if(s_savez) ds->m_texture.Save(str); // if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str); // TODO } + + m_tc->InvalidateTextures(context->FRAME, context->ZBUF); } virtual void Draw(int prim, Texture& rt, Texture& ds, typename GSTextureCache::GSTexture* tex) = 0; @@ -507,6 +507,35 @@ protected: #pragma endregion + #pragma region GoW2 z buffer clear + + if(m_game.title == CRC::GodOfWar2) + { + DWORD FBP = m_context->FRAME.Block(); + DWORD FBW = m_context->FRAME.FBW; + DWORD FPSM = m_context->FRAME.PSM; + + if((FBP == 0x00f00 || FBP == 0x00100) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100 + { + GIFRegTEX0 TEX0; + + TEX0.TBP0 = FBP; + TEX0.TBW = FBW; + TEX0.PSM = FPSM; + + if(GSTextureCache::GSDepthStencil* ds = m_tc->GetDepthStencil(TEX0, m_width, m_height)) + { + m_dev.ClearDepth(ds->m_texture, 0); + } + + return false; + } + + return true; + } + + #pragma endregion + return true; } diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index ff17150ec4..0664a1fff6 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2081,6 +2081,27 @@ bool GSC_GodOfWar(const GSFrameInfo& fi, int& skip) return true; } +bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) +{ + if(skip == 0) + { + if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16 // ntsc + || fi.TME && fi.FBP == 0x02100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT16) // pal + { + skip = 30; // shadows + } + else if(fi.TME && fi.FBP == 0x00500 && fi.FPSM == PSM_PSMCT24 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT32) // pal + { + // skip = 17; // only looks correct at native resolution + } + } + else + { + } + + return true; +} + bool GSC_GiTS(const GSFrameInfo& fi, int& skip) { if(skip == 0) @@ -2172,7 +2193,7 @@ bool GSState::IsBadFrame(int& skip) map[CRC::Tekken5] = GSC_Tekken5; map[CRC::IkkiTousen] = GSC_IkkiTousen; map[CRC::GodOfWar] = GSC_GodOfWar; - map[CRC::GodOfWar2] = GSC_GodOfWar; + map[CRC::GodOfWar2] = GSC_GodOfWar2; map[CRC::GiTS] = GSC_GiTS; map[CRC::Onimusha3] = GSC_Onimusha3; map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss; diff --git a/plugins/GSdx/GSTexture9.cpp b/plugins/GSdx/GSTexture9.cpp index 8d75cc2845..bff9df141c 100644 --- a/plugins/GSdx/GSTexture9.cpp +++ b/plugins/GSdx/GSTexture9.cpp @@ -140,8 +140,8 @@ void GSTexture9::Unmap() bool GSTexture9::Save(CString fn, bool dds) { - CComPtr res; - + CComPtr surface; + if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) { HRESULT hr; @@ -153,8 +153,6 @@ bool GSTexture9::Save(CString fn, bool dds) if(desc.Format != D3DFMT_D32F_LOCKABLE) return false; - CComPtr surface; - hr = m_dev->CreateOffscreenPlainSurface(desc.Width, desc.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, NULL); D3DLOCKED_RECT slr, dlr; @@ -175,24 +173,22 @@ bool GSTexture9::Save(CString fn, bool dds) m_surface->UnlockRect(); surface->UnlockRect(); - - res = surface; } else { - res = m_surface; + surface = m_surface; } - if(CComQIPtr surface = res) + if(surface != NULL) { return SUCCEEDED(D3DXSaveSurfaceToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, surface, NULL, NULL)); } - - if(CComQIPtr texture = res) +/* + if(CComQIPtr texture = surface) { return SUCCEEDED(D3DXSaveTextureToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, texture, NULL)); } - +*/ return false; } diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 99769d6c85..2e2050a573 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -1404,18 +1404,59 @@ INT_PTR CALLBACK DialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, LPARAM l if (i >= 0) { unsigned int index = (unsigned int)SendMessage(GetDlgItem(hWnd, IDC_FORCEFEEDBACK), CB_GETITEMDATA, i, 0); if (index < (unsigned int) dm->numDevices) { + Device *dev = dm->devices[index]; ForceFeedbackBinding *b; - int count = CreateEffectBinding(dm->devices[index], 0, port, slot, cmd-ID_BIG_MOTOR, &b); + int count = CreateEffectBinding(dev, 0, port, slot, cmd-ID_BIG_MOTOR, &b); if (b) { - for (int j=0; j<2 && j devices[index]->numFFAxes; j++) { - b->axes[j].force = BASE_SENSITIVITY; + int needSet = 1; + if (dev->api == XINPUT && dev->numFFAxes == 2) { + needSet = 0; + if (cmd == ID_BIG_MOTOR) { + b->axes[0].force = BASE_SENSITIVITY; + } + else { + b->axes[1].force = BASE_SENSITIVITY; + } + } + else if (dev->api == DI) { + int bigIndex=0, littleIndex=0; + int constantEffect = 0, squareEffect = 0; + int j; + for (j=0; jnumFFAxes; j++) { + // DI object instance. 0 is x-axis, 1 is y-axis. + int instance = (dev->ffAxes[j].id>>8)&0xFFFF; + if (instance == 0) { + bigIndex = j; + } + else if (instance == 1) { + littleIndex = j; + } + } + for (j=0; jnumFFEffectTypes; j++) { + if (!wcsicmp(L"13541C20-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) constantEffect = j; + if (!wcsicmp(L"13541C22-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) squareEffect = j; + } + needSet = 0; + if (cmd == ID_BIG_MOTOR) { + b->axes[bigIndex].force = BASE_SENSITIVITY; + b->axes[littleIndex].force = 1; + b->effectIndex = constantEffect; + } + else { + b->axes[bigIndex].force = 1; + b->axes[littleIndex].force = BASE_SENSITIVITY; + b->effectIndex = squareEffect; + } + } + if (needSet) { + for (int j=0; j<2 && j numFFAxes; j++) { + b->axes[j].force = BASE_SENSITIVITY; + } } - } - if (count >= 0) { - PropSheet_Changed(hWndProp, hWnd); UnselectAll(hWndList); ListView_SetItemState(hWndList, count, LVIS_SELECTED, LVIS_SELECTED); } + PropSheet_Changed(hWndProp, hWnd); } } } @@ -1867,7 +1908,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L InsertMenuItemW(hMenu, index, 1, &info); } else { - info.wID = port2+2*slot2; + info.wID = port2+2*slot2+1; wsprintfW(text, L"Swap with %s", pad); InsertMenuItemW(hMenu, 0, 1, &info); } @@ -1879,12 +1920,14 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L DestroyMenu(hMenu); if (!res) break; if (res > 0) { + res--; slot2 = res / 2; port2 = res&1; PadConfig padCfgTemp = config.padConfigs[port1][slot1]; config.padConfigs[port1][slot1] = config.padConfigs[port2][slot2]; config.padConfigs[port2][slot2] = padCfgTemp; for (int i=0; inumDevices; i++) { + if (dm->devices[i]->type == IGNORE) continue; PadBindings bindings = dm->devices[i]->pads[port1][slot1]; dm->devices[i]->pads[port1][slot1] = dm->devices[i]->pads[port2][slot2]; dm->devices[i]->pads[port2][slot2] = bindings; @@ -1892,6 +1935,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L } else { for (int i=0; inumDevices; i++) { + if (dm->devices[i]->type == IGNORE) continue; free(dm->devices[i]->pads[port1][slot1].bindings); for (int j=0; jdevices[i]->pads[port1][slot1].numFFBindings; j++) { free(dm->devices[i]->pads[port1][slot1].ffBindings[j].axes); diff --git a/plugins/LilyPad/KeyboardQueue.cpp b/plugins/LilyPad/KeyboardQueue.cpp index 39ad231bf3..70badb83de 100644 --- a/plugins/LilyPad/KeyboardQueue.cpp +++ b/plugins/LilyPad/KeyboardQueue.cpp @@ -1,13 +1,16 @@ // This is undoubtedly completely unnecessary. #include "KeyboardQueue.h" -static int numQueuedEvents = 0; -static keyEvent queuedEvents[20]; - // What MS calls a single process Mutex. Faster, supposedly. // More importantly, can be abbreviated, amusingly, as cSection. static CRITICAL_SECTION cSection; -static int csInitialized = 0; +static u8 csInitialized = 0; + +#define EVENT_QUEUE_LEN 16 +// Actually points one beyond the last queued event. +static u8 lastQueuedEvent = 0; +static u8 nextQueuedEvent = 0; +static keyEvent queuedEvents[EVENT_QUEUE_LEN]; void QueueKeyEvent(int key, int event) { if (!csInitialized) { @@ -15,50 +18,42 @@ void QueueKeyEvent(int key, int event) { InitializeCriticalSection(&cSection); } EnterCriticalSection(&cSection); - if (numQueuedEvents >= 15) { - // Generally shouldn't happen. - for (int i=0; i<15; i++) { - queuedEvents[i] = queuedEvents[i+5]; - } - numQueuedEvents = 15; + + // Don't queue events if escape is on top of queue. This is just for safety + // purposes when a game is killing the emulator for whatever reason. + if (nextQueuedEvent == lastQueuedEvent || + queuedEvents[nextQueuedEvent].key != VK_ESCAPE || + queuedEvents[nextQueuedEvent].evt != KEYPRESS) { + // Clear queue on escape down, bringing escape to front. May do something + // with shift/ctrl/alt and F-keys, later. + if (event == KEYPRESS && key == VK_ESCAPE) { + nextQueuedEvent = lastQueuedEvent; + } + + queuedEvents[lastQueuedEvent].key = key; + queuedEvents[lastQueuedEvent].evt = event; + + lastQueuedEvent = (lastQueuedEvent + 1) % EVENT_QUEUE_LEN; + // If queue wrapped around, remove last element. + if (nextQueuedEvent == lastQueuedEvent) { + nextQueuedEvent = (nextQueuedEvent + 1) % EVENT_QUEUE_LEN; + } } - int index = numQueuedEvents; - // Move escape to top of queue. May do something - // with shift/ctrl/alt and F-keys, later. - if (event == KEYPRESS && key == VK_ESCAPE) { - while (index) { - queuedEvents[index-1] = queuedEvents[index]; - index--; - } - } - queuedEvents[index].key = key; - queuedEvents[index].evt = event; - numQueuedEvents ++; LeaveCriticalSection(&cSection); } int GetQueuedKeyEvent(keyEvent *event) { - int out = 0; - if (numQueuedEvents) { - EnterCriticalSection(&cSection); - // Shouldn't be 0, but just in case... - if (numQueuedEvents) { - *event = queuedEvents[0]; - numQueuedEvents--; - out = 1; - for (int i=0; i