--updated to r956

--uses test8 instead of test32 when possible
--exception handling checks are a bit more strict

git-svn-id: http://pcsx2.googlecode.com/svn/branches/vtlb-exp@957 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
drkiiraziel 2009-04-12 02:49:23 +00:00
commit f3370ce28f
79 changed files with 8768 additions and 7640 deletions

View File

@ -55,8 +55,6 @@
// disable the default case in a switch
#define jNO_DEFAULT \
{ \
break; \
\
default: \
jASSUME(0); \
break; \

View File

@ -20,11 +20,11 @@
#define __PCSX2CONFIG_H__
// Hack so that you can still use this file from C (not C++), or from a plugin without access to Paths.h.
#ifdef PLUGIN_ONLY
// .. and removed in favor of a less hackish approach (air)
#ifndef g_MaxPath
#define g_MaxPath 255
#else
#include "Paths.h"
#endif
#endif
/////////////////////////////////////////////////////////////////////////
// Session Configuration Override Flags
@ -45,6 +45,7 @@ extern SessionOverrideFlags g_Session;
// Pcsx2 User Configuration Options!
//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs
//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now)
#define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs
#define PCSX2_EEREC 0x10
#define PCSX2_VU0REC 0x20

View File

@ -29,6 +29,8 @@
#define PCSX2_VERSION "(beta)"
#include "System.h"
#include "Plugins.h"
#include "SaveState.h"
@ -40,7 +42,4 @@
#include "Elfheader.h"
#include "Patch.h"
#include "System.h"
#include "Pcsx2Config.h"
#endif /* __COMMON_H__ */

View File

@ -164,7 +164,7 @@ struct vSyncTimingInfo
static vSyncTimingInfo vSyncInfo;
static __forceinline void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame )
static void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame )
{
// Important: Cannot use floats or doubles here. The emulator changes rounding modes
// depending on user-set speedhack options, and it can break float/double code
@ -270,8 +270,6 @@ u32 UpdateVSyncRate()
return (u32)m_iTicks;
}
extern u32 vu0time;
void frameLimitReset()
{
m_iStart = GetCPUTicks();
@ -282,13 +280,13 @@ void frameLimitReset()
// See the GS FrameSkip function for details on why this is here and not in the GS.
static __forceinline void frameLimit()
{
if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return;
if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off...
s64 sDeltaTime;
u64 uExpectedEnd;
u64 iEnd;
if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return;
if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off...
uExpectedEnd = m_iStart + m_iTicks;
iEnd = GetCPUTicks();
@ -465,7 +463,7 @@ __forceinline bool rcntUpdate_vSync()
return false;
}
static __forceinline void __fastcall _cpuTestTarget( int i )
static __forceinline void _cpuTestTarget( int i )
{
if (counters[i].count < counters[i].target) return;
@ -538,7 +536,7 @@ __forceinline bool rcntUpdate()
return retval;
}
static void _rcntSetGate( int index )
static __forceinline void _rcntSetGate( int index )
{
if (counters[index].mode.EnableGate)
{
@ -563,7 +561,7 @@ static void _rcntSetGate( int index )
}
// mode - 0 means hblank source, 8 means vblank source.
void __fastcall rcntStartGate(bool isVblank, u32 sCycle)
__forceinline void rcntStartGate(bool isVblank, u32 sCycle)
{
int i;
@ -624,7 +622,7 @@ void __fastcall rcntStartGate(bool isVblank, u32 sCycle)
}
// mode - 0 means hblank signal, 8 means vblank signal.
void __fastcall rcntEndGate(bool isVblank , u32 sCycle)
__forceinline void rcntEndGate(bool isVblank , u32 sCycle)
{
int i;
@ -665,7 +663,7 @@ void __fastcall rcntEndGate(bool isVblank , u32 sCycle)
// rcntUpdate, since we're being called from there anyway.
}
void __fastcall rcntWmode(int index, u32 value)
__forceinline void rcntWmode(int index, u32 value)
{
if(counters[index].mode.IsCounting) {
if(counters[index].mode.ClockSource != 0x3) {
@ -696,7 +694,7 @@ void __fastcall rcntWmode(int index, u32 value)
_rcntSet( index );
}
void __fastcall rcntWcount(int index, u32 value)
__forceinline void rcntWcount(int index, u32 value)
{
EECNT_LOG("EE Counter[%d] writeCount = %x, oldcount=%x, target=%x", index, value, counters[index].count, counters[index].target );
@ -722,7 +720,7 @@ void __fastcall rcntWcount(int index, u32 value)
_rcntSet( index );
}
void __fastcall rcntWtarget(int index, u32 value)
__forceinline void rcntWtarget(int index, u32 value)
{
EECNT_LOG("EE Counter[%d] writeTarget = %x", index, value);
@ -738,13 +736,13 @@ void __fastcall rcntWtarget(int index, u32 value)
_rcntSet( index );
}
void __fastcall rcntWhold(int index, u32 value)
__forceinline void rcntWhold(int index, u32 value)
{
EECNT_LOG("EE Counter[%d] Hold Write = %x", index, value);
counters[index].hold = value;
}
u32 __fastcall rcntRcount(int index)
__forceinline u32 rcntRcount(int index)
{
u32 ret;
@ -759,7 +757,7 @@ u32 __fastcall rcntRcount(int index)
return ret;
}
u32 __fastcall rcntCycle(int index)
__forceinline u32 rcntCycle(int index)
{
if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3))
return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate);

View File

@ -139,14 +139,14 @@ extern bool rcntUpdate_vSync();
extern bool rcntUpdate();
extern void rcntInit();
extern void __fastcall rcntStartGate(bool mode, u32 sCycle);
extern void __fastcall rcntEndGate(bool mode, u32 sCycle);
extern void __fastcall rcntWcount(int index, u32 value);
extern void __fastcall rcntWmode(int index, u32 value);
extern void __fastcall rcntWtarget(int index, u32 value);
extern void __fastcall rcntWhold(int index, u32 value);
extern u32 __fastcall rcntRcount(int index);
extern u32 __fastcall rcntCycle(int index);
extern void rcntStartGate(bool mode, u32 sCycle);
extern void rcntEndGate(bool mode, u32 sCycle);
extern void rcntWcount(int index, u32 value);
extern void rcntWmode(int index, u32 value);
extern void rcntWtarget(int index, u32 value);
extern void rcntWhold(int index, u32 value);
extern u32 rcntRcount(int index);
extern u32 rcntCycle(int index);
u32 UpdateVSyncRate();
void frameLimitReset();

View File

@ -190,6 +190,8 @@ extern bool SrcLog_GPU( const char* fmt, ... );
#define MEMCARDS_LOG 0&&
#endif
//#define VIFUNPACKDEBUG //enable unpack debugging output
#ifdef VIFUNPACKDEBUG
#define VIFUNPACK_LOG VIF_LOG
#else

View File

@ -16,11 +16,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef _PCSX2_EXCEPTIONS_H_
#define _PCSX2_EXCEPTIONS_H_
#include <stdexcept>
#include "StringUtils.h"
#pragma once
// This class provides an easy and clean method for ensuring objects are not copyable.
class NoncopyableObject
@ -380,5 +376,3 @@ namespace Exception
{}
};
}
#endif

View File

@ -33,6 +33,7 @@
//////////////////////////////////////////////////////////////////////////////////////////
// Include the STL junk that's actually handy.
#include <stdexcept>
#include <algorithm>
#include <vector>
#include <string>
@ -69,7 +70,9 @@ typedef int BOOL;
#include "zlib/zlib.h"
#include "PS2Etypes.h"
#include "MemcpyFast.h"
#include "StringUtils.h"
#include "Exceptions.h"
////////////////////////////////////////////////////////////////////
// Compiler/OS specific macros and defines -- Begin Section
@ -155,24 +158,3 @@ static __forceinline u32 timeGetTime()
# define __releaseinline __forceinline
#endif
//////////////////////////////////////////////////////////////////////////////////////////
// Emitter Instance Identifiers. If you add a new emitter, do it here also.
// Note: Currently most of the instances map back to 0, since existing dynarec code all
// shares iCore and must therefore all share the same emitter instance.
// (note: these don't really belong here per-se, but it's an easy spot to use for now)
enum
{
EmitterId_R5900 = 0,
EmitterId_R3000a = EmitterId_R5900,
EmitterId_VU0micro = EmitterId_R5900,
EmitterId_VU1micro = EmitterId_R5900,
// Cotton's new microVU, which is iCore-free
EmitterId_microVU0,
EmitterId_microVU1,
// Air's eventual IopRec, which will also be iCore-free
EmitterId_R3000air,
EmitterId_Count // must always be last!
};

View File

@ -224,7 +224,7 @@ static __forceinline void _psxTestInterrupts()
}
}
void psxBranchTest()
__releaseinline void psxBranchTest()
{
if( psxTestCycle( psxNextsCounter, psxNextCounter ) )
{

View File

@ -200,7 +200,7 @@ extern R3000Acpu psxRec;
void psxReset();
void psxShutdown();
void psxException(u32 code, u32 step);
void psxBranchTest();
extern void psxBranchTest();
void psxExecuteBios();
void psxMemReset();

View File

@ -106,7 +106,7 @@ void cpuShutdown()
disR5900FreeSyms();
}
__releaseinline void __fastcall cpuException(u32 code, u32 bd)
__releaseinline void cpuException(u32 code, u32 bd)
{
cpuRegs.branch = 0; // Tells the interpreter that an exception occurred during a branch.
bool errLevel2, checkStatus;
@ -244,7 +244,7 @@ void cpuTestMissingHwInts() {
}
// sets a branch test to occur some time from an arbitrary starting point.
__forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta )
__forceinline void cpuSetNextBranch( u32 startCycle, s32 delta )
{
// typecast the conditional to signed so that things don't blow up
// if startCycle is greater than our next branch cycle.
@ -252,20 +252,18 @@ __forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta )
if( (int)(g_nextBranchCycle - startCycle) > delta )
{
g_nextBranchCycle = startCycle + delta;
return 1;
}
return 0;
}
// sets a branch to occur some time from the current cycle
__forceinline int __fastcall cpuSetNextBranchDelta( s32 delta )
__forceinline void cpuSetNextBranchDelta( s32 delta )
{
return cpuSetNextBranch( cpuRegs.cycle, delta );
cpuSetNextBranch( cpuRegs.cycle, delta );
}
// tests the cpu cycle agaisnt the given start and delta values.
// Returns true if the delta time has passed.
__forceinline int __fastcall cpuTestCycle( u32 startCycle, s32 delta )
__forceinline int cpuTestCycle( u32 startCycle, s32 delta )
{
// typecast the conditional to signed so that things don't explode
// if the startCycle is ahead of our current cpu cycle.
@ -279,7 +277,7 @@ __forceinline void cpuSetBranch()
g_nextBranchCycle = cpuRegs.cycle;
}
void cpuClearInt( uint i )
__forceinline void cpuClearInt( uint i )
{
jASSUME( i < 32 );
cpuRegs.interrupt &= ~(1 << i);

View File

@ -257,14 +257,14 @@ extern void cpuInit();
extern void cpuReset(); // can throw Exception::FileNotFound.
extern void cpuShutdown();
extern void cpuExecuteBios();
extern void __fastcall cpuException(u32 code, u32 bd);
extern void cpuException(u32 code, u32 bd);
extern void cpuTlbMissR(u32 addr, u32 bd);
extern void cpuTlbMissW(u32 addr, u32 bd);
extern void cpuTestHwInts();
extern int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta );
extern int __fastcall cpuSetNextBranchDelta( s32 delta );
extern int __fastcall cpuTestCycle( u32 startCycle, s32 delta );
extern void cpuSetNextBranch( u32 startCycle, s32 delta );
extern void cpuSetNextBranchDelta( s32 delta );
extern int cpuTestCycle( u32 startCycle, s32 delta );
extern void cpuSetBranch();
extern bool _cpuBranchTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900:

View File

@ -18,8 +18,6 @@
#ifndef _R5900_OPCODETABLES_H
#define _R5900_OPCODETABLES_H
#include <string>
#include "PS2Etypes.h"
// TODO : Move these into the OpcodeTables namespace

View File

@ -451,16 +451,16 @@ void SIO_CommandWrite(u8 value,int way) {
break;
case 0x21:
// Set pad slot.
sio.mtapst = 0x21;
sio.mtapst = value;
sio.bufcount = 6; // No idea why this is 6, saved from old code.
break;
case 0x22:
// Set memcard slot.
sio.mtapst = 0x22;
sio.mtapst = value;
sio.bufcount = 6; // No idea why this is 6, saved from old code.
break;
}
// Commented out values are from original code. Break multitap in bios.
// Commented out values are from original code. They break multitap in bios.
sio.buf[sio.bufcount-1]=0;//'+';
sio.buf[sio.bufcount]=0;//'Z';
return;
@ -554,6 +554,7 @@ void InitializeSIO(u8 value)
int port = sio.GetMultitapPort();
if (!IsMtapPresent(port))
{
// If "unplug" multitap mid game, set active slots to 0.
sio.activePadSlot[port] = 0;
sio.activeMemcardSlot[port] = 0;
}

View File

@ -20,9 +20,9 @@
#define __SYSTEM_H__
#include "PS2Etypes.h"
#include "Paths.h"
#include "Pcsx2Config.h"
#include "Exceptions.h"
#include "Paths.h"
#include "MemcpyFast.h"
#include "SafeArray.h"
#include "Misc.h"

View File

@ -2508,13 +2508,23 @@ void _vuRegsMTIR(VURegs * VU, _VURegsNum *VUregsn) {
VUregsn->pipe = VUPIPE_FMAC;
VUregsn->VFwrite = 0;
VUregsn->VFread0 = _Fs_;
VUregsn->VFr0xyzw= _XYZW;
VUregsn->VFr0xyzw= 1 << (3-_Fsf_);
VUregsn->VFread1 = 0;
VUregsn->VIwrite = 1 << _Ft_;
VUregsn->VIread = GET_VF0_FLAG(_Fs_);
}
VUREGS_FTFS(MR32);
void _vuRegsMR32(VURegs * VU, _VURegsNum *VUregsn) {
VUregsn->pipe = VUPIPE_FMAC;
VUregsn->VFwrite = _Ft_;
VUregsn->VFwxyzw = _XYZW;
VUregsn->VFread0 = _Fs_;
VUregsn->VFr0xyzw= (_XYZW >> 1) | ((_XYZW << 3) & 0xf); //rotate
VUregsn->VFread1 = 0;
VUregsn->VFr1xyzw = 0xff;
VUregsn->VIwrite = 0;
VUregsn->VIread = (_Ft_ ? GET_VF0_FLAG(_Fs_) : 0);
}
void _vuRegsLQ(VURegs * VU, _VURegsNum *VUregsn) {
VUregsn->pipe = VUPIPE_FMAC;

View File

@ -25,10 +25,10 @@
#include "Vif.h"
#include "VifDma.h"
VIFregisters *_vifRegs;
u32* _vifRow = NULL, *_vifCol = NULL;
u32* _vifMaskRegs = NULL;
vifStruct *_vif;
VIFregisters *vifRegs;
u32* vifRow = NULL, *vifCol = NULL;
u32* vifMaskRegs = NULL;
vifStruct *vif;
PCSX2_ALIGNED16(u32 g_vifRow0[4]);
PCSX2_ALIGNED16(u32 g_vifCol0[4]);
@ -44,35 +44,37 @@ enum UnpackOffset
OFFSET_X = 0,
OFFSET_Y = 1,
OFFSET_Z = 2,
OFFSET_W =3
OFFSET_W = 3
};
#define spr0 ((DMACh*)&PS2MEM_HW[0xD000])
__forceinline static int _limit(int a, int max)
{
return (a > max) ? max : a;
}
static __releaseinline void writeX(u32 &dest, u32 data)
static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
{
int n;
u32 vifRowReg = getVifRowRegs(offnum);
if (_vifRegs->code & 0x10000000)
if (vifRegs->code & 0x10000000)
{
switch (_vif->cl)
switch (vif->cl)
{
case 0:
n = (_vifRegs->mask) & 0x3;
if (offnum == OFFSET_X)
n = (vifRegs->mask) & 0x3;
else
n = (vifRegs->mask >> (offnum * 2)) & 0x3;
break;
case 1:
n = (_vifRegs->mask >> 8) & 0x3;
n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
break;
case 2:
n = (_vifRegs->mask >> 16) & 0x3;
n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
break;
default:
n = (_vifRegs->mask >> 24) & 0x3;
n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
break;
}
}
@ -81,355 +83,144 @@ static __releaseinline void writeX(u32 &dest, u32 data)
switch (n)
{
case 0:
if ((_vif->cmd & 0x6F) == 0x6f)
if ((vif->cmd & 0x6F) == 0x6f)
{
dest = data;
}
else if (_vifRegs->mode == 1)
{
dest = data + _vifRegs->r0;
}
else if (_vifRegs->mode == 2)
{
_vifRegs->r0 += data;
dest = _vifRegs->r0;
}
else
else switch (vifRegs->mode)
{
case 1:
dest = data + vifRowReg;
break;
case 2:
// vifRowReg isn't used after this, or I would make it equal to dest here.
dest = setVifRowRegs(offnum, vifRowReg + data);
break;
default:
dest = data;
break;
}
break;
case 1:
dest = _vifRegs->r0;
dest = vifRowReg;
break;
case 2:
switch (_vif->cl)
{
case 0:
dest = _vifRegs->c0;
dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
break;
case 1:
dest = _vifRegs->c1;
break;
case 2:
dest = _vifRegs->c2;
break;
default:
dest = _vifRegs->c3;
case 3:
break;
}
break;
}
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data);
}
static __releaseinline void writeY(u32 &dest, u32 data)
{
int n;
if (_vifRegs->code & 0x10000000)
{
switch (_vif->cl)
{
case 0:
n = (_vifRegs->mask >> 2) & 0x3;
break;
case 1:
n = (_vifRegs->mask >> 10) & 0x3;
break;
case 2:
n = (_vifRegs->mask >> 18) & 0x3;
break;
default:
n = (_vifRegs->mask >> 26) & 0x3;
break;
}
}
else n = 0;
switch (n)
{
case 0:
if ((_vif->cmd & 0x6F) == 0x6f)
{
dest = data;
}
else if (_vifRegs->mode == 1)
{
dest = data + _vifRegs->r1;
}
else if (_vifRegs->mode == 2)
{
_vifRegs->r1 += data;
dest = _vifRegs->r1;
}
else
{
dest = data;
}
break;
case 1:
dest = _vifRegs->r1;
break;
case 2:
switch (_vif->cl)
{
case 0:
dest = _vifRegs->c0;
break;
case 1:
dest = _vifRegs->c1;
break;
case 2:
dest = _vifRegs->c2;
break;
default:
dest = _vifRegs->c3;
break;
}
break;
}
// VIF_LOG("writeY %8.8x : Mode %d, r1 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r1,data);
}
static __releaseinline void writeZ(u32 &dest, u32 data)
{
int n;
if (_vifRegs->code & 0x10000000)
{
switch (_vif->cl)
{
case 0:
n = (_vifRegs->mask >> 4) & 0x3;
break;
case 1:
n = (_vifRegs->mask >> 12) & 0x3;
break;
case 2:
n = (_vifRegs->mask >> 20) & 0x3;
break;
default:
n = (_vifRegs->mask >> 28) & 0x3;
break;
}
}
else n = 0;
switch (n)
{
case 0:
if ((_vif->cmd & 0x6F) == 0x6f)
{
dest = data;
}
else if (_vifRegs->mode == 1)
{
dest = data + _vifRegs->r2;
}
else if (_vifRegs->mode == 2)
{
_vifRegs->r2 += data;
dest = _vifRegs->r2;
}
else
{
dest = data;
}
break;
case 1:
dest = _vifRegs->r2;
break;
case 2:
switch (_vif->cl)
{
case 0:
dest = _vifRegs->c0;
break;
case 1:
dest = _vifRegs->c1;
break;
case 2:
dest = _vifRegs->c2;
break;
default:
dest = _vifRegs->c3;
break;
}
break;
}
// VIF_LOG("writeZ %8.8x : Mode %d, r2 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r2,data);
}
static __releaseinline void writeW(u32 &dest, u32 data)
{
int n;
if (_vifRegs->code & 0x10000000)
{
switch (_vif->cl)
{
case 0:
n = (_vifRegs->mask >> 6) & 0x3;
break;
case 1:
n = (_vifRegs->mask >> 14) & 0x3;
break;
case 2:
n = (_vifRegs->mask >> 22) & 0x3;
break;
default:
n = (_vifRegs->mask >> 30) & 0x3;
break;
}
}
else n = 0;
switch (n)
{
case 0:
if ((_vif->cmd & 0x6F) == 0x6f)
{
dest = data;
}
else if (_vifRegs->mode == 1)
{
dest = data + _vifRegs->r3;
}
else if (_vifRegs->mode == 2)
{
_vifRegs->r3 += data;
dest = _vifRegs->r3;
}
else
{
dest = data;
}
break;
case 1:
dest = _vifRegs->r3;
break;
case 2:
switch (_vif->cl)
{
case 0:
dest = _vifRegs->c0;
break;
case 1:
dest = _vifRegs->c1;
break;
case 2:
dest = _vifRegs->c2;
break;
default:
dest = _vifRegs->c3;
break;
}
break;
}
// VIF_LOG("writeW %8.8x : Mode %d, r3 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r3,data);
}
template <class T>
static void _UNPACKpart(u32 offnum, u32 &x, T y)
{
if (_vifRegs->offset == offnum)
{
switch (offnum)
{
case OFFSET_X:
writeX(x,y);
break;
case OFFSET_Y:
writeY(x,y);
break;
case OFFSET_Z:
writeZ(x,y);
break;
case OFFSET_W:
writeW(x,y);
break;
default:
break;
}
_vifRegs->offset++;
}
}
template <class T>
static void _UNPACKpart(u32 offnum, u32 &x, T y, int &size)
{
if (_vifRegs->offset == offnum)
{
switch (offnum)
{
case OFFSET_X:
writeX(x,y);
break;
case OFFSET_Y:
writeY(x,y);
break;
case OFFSET_Z:
writeZ(x,y);
break;
case OFFSET_W:
writeW(x,y);
break;
default:
break;
}
size--;
_vifRegs->offset++;
}
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
}
template <class T>
void __fastcall UNPACK_S(u32 *dest, T *data, int size)
{
_UNPACKpart(OFFSET_X, *dest++, *data, size);
_UNPACKpart(OFFSET_Y, *dest++, *data, size);
_UNPACKpart(OFFSET_Z, *dest++, *data, size);
_UNPACKpart(OFFSET_W, *dest , *data, size);
if (_vifRegs->offset == 4) _vifRegs->offset = 0;
//S-# will always be a complete packet, no matter what. So we can skip the offset bits
writeXYZW(OFFSET_X, *dest++, *data);
writeXYZW(OFFSET_Y, *dest++, *data);
writeXYZW(OFFSET_Z, *dest++, *data);
writeXYZW(OFFSET_W, *dest , *data);
}
template <class T>
void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
{
_UNPACKpart(OFFSET_X, *dest++, *data++, size);
_UNPACKpart(OFFSET_Y, *dest++, *data--, size);
_UNPACKpart(OFFSET_Z, *dest++, *data++);
_UNPACKpart(OFFSET_W, *dest , *data);
if (_vifRegs->offset == 4) _vifRegs->offset = 0;
if (vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if (vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW(vifRegs->offset, *dest++, *data);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if (vifRegs->offset == OFFSET_Z)
{
writeXYZW(vifRegs->offset, *dest++, *dest-2);
vifRegs->offset = OFFSET_W;
}
if (vifRegs->offset == OFFSET_W)
{
writeXYZW(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <class T>
void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
{
_UNPACKpart(OFFSET_X, *dest++, *data++, size);
_UNPACKpart(OFFSET_Y, *dest++, *data++, size);
_UNPACKpart(OFFSET_Z, *dest++, *data++, size);
_UNPACKpart(OFFSET_W, *dest, *data);
if (_vifRegs->offset == 4) _vifRegs->offset = 0;
if(vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if(vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if(vifRegs->offset == OFFSET_Z)
{
if (size > 0)
{
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_W;
size--;
}
}
if(vifRegs->offset == OFFSET_W)
{
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
writeXYZW(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <class T>
void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
{
_UNPACKpart(OFFSET_X, *dest++, *data++, size);
_UNPACKpart(OFFSET_Y, *dest++, *data++, size);
_UNPACKpart(OFFSET_Z, *dest++, *data++, size);
_UNPACKpart(OFFSET_W, *dest , *data, size);
if (_vifRegs->offset == 4) _vifRegs->offset = 0;
while (size > 0)
{
writeXYZW(vifRegs->offset, *dest++, *data++);
vifRegs->offset++;
size--;
}
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
}
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
{
_UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3), size);
_UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2), size);
_UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7), size);
_UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8), size);
if (_vifRegs->offset == 4) _vifRegs->offset = 0;
//As with S-#, this will always be a complete packet
writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
}
void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size)
@ -599,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer()
return ret;
}
static __forceinline int mfifoVIF1chain()
static __forceinline int mfifo_VIF1chain()
{
int ret;
@ -739,7 +530,7 @@ void vifMFIFOInterrupt()
{
g_vifCycles = 0;
if (vif1.inprogress == 1) mfifoVIF1chain();
if (vif1.inprogress == 1) mfifo_VIF1chain();
if (vif1.irq && vif1.tag.size == 0)
{

View File

@ -24,6 +24,7 @@ struct vifCycle {
u8 pad[2];
};
// r0-r3 and c0-c3 would be more managable as arrays.
struct VIFregisters {
u32 stat;
u32 pad0[3];
@ -80,14 +81,97 @@ struct VIFregisters {
extern "C"
{
// these use cdecl for Asm code references.
extern VIFregisters *_vifRegs;
extern u32* _vifMaskRegs;
extern u32* _vifRow;
extern VIFregisters *vifRegs;
extern u32* vifMaskRegs;
extern u32* vifRow;
extern u32* _vifCol;
}
static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
{
switch (reg)
{
case 0:
vifRegs->r0 = data;
break;
case 1:
vifRegs->r1 = data;
break;
case 2:
vifRegs->r2 = data;
break;
case 3:
vifRegs->r3 = data;
break;
jNO_DEFAULT;
}
return data;
}
static __forceinline u32 getVifRowRegs(u32 reg)
{
switch (reg)
{
case 0:
return vifRegs->r0;
break;
case 1:
return vifRegs->r1;
break;
case 2:
return vifRegs->r2;
break;
case 3:
return vifRegs->r3;
break;
jNO_DEFAULT;
}
}
static __forceinline u32 setVifColRegs(u32 reg, u32 data)
{
switch (reg)
{
case 0:
vifRegs->c0 = data;
break;
case 1:
vifRegs->c1 = data;
break;
case 2:
vifRegs->c2 = data;
break;
case 3:
vifRegs->c3 = data;
break;
jNO_DEFAULT;
}
return data;
}
static __forceinline u32 getVifColRegs(u32 reg)
{
switch (reg)
{
case 0:
return vifRegs->c0;
break;
case 1:
return vifRegs->c1;
break;
case 2:
return vifRegs->c2;
break;
case 3:
return vifRegs->c3;
break;
jNO_DEFAULT;
}
}
#define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800])
#define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00])
#define spr0 ((DMACh*)&PS2MEM_HW[0xD000])
void dmaVIF0();
void dmaVIF1();

View File

@ -29,7 +29,7 @@
using namespace std; // for min / max
//#define VIFUNPACKDEBUG //enable unpack debugging output
#define gif ((DMACh*)&PS2MEM_HW[0xA000])
@ -37,10 +37,10 @@ using namespace std; // for min / max
extern "C"
{
// Need cdecl on these for ASM references.
extern VIFregisters *_vifRegs;
extern u32* _vifMaskRegs;
extern u32* _vifRow;
extern u32* _vifCol;
extern VIFregisters *vifRegs;
extern u32* vifMaskRegs;
extern u32* vifRow;
extern u32* vifCol;
}
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]);
@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]);
PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]);
PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]);
extern vifStruct *_vif;
extern vifStruct *vif;
vifStruct vif0, vif1;
@ -254,57 +254,45 @@ __forceinline static int _limit(int a, int max)
static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum)
{
const VIFUnpackFuncTable *unpack;
vifStruct *vif;
VIFregisters *vifRegs;
unpack = &VIFfuncTable[ unpackType ];
if (VIFdmanum == 0)
{
vif = &vif0;
vifRegs = vif0Regs;
}
else
{
vif = &vif1;
vifRegs = vif1Regs;
}
unpack = &VIFfuncTable[ unpackType ];
switch (unpackType)
{
case 0x0:
vif->tag.addr += size * 4;
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing S-32 skip, size = %d", size);
break;
case 0x1:
vif->tag.addr += size * 8;
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing S-16 skip, size = %d", size);
break;
case 0x2:
vif->tag.addr += size * 16;
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing S-8 skip, size = %d", size);
break;
case 0x4:
vif->tag.addr += size + ((size / unpack->gsize) * 8);
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V2-32 skip, size = %d", size);
break;
case 0x5:
vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 8);
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V2-16 skip, size = %d", size);
break;
case 0x6:
vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 8);
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size);
break;
case 0x8:
vif->tag.addr += size + ((size / unpack->gsize) * 4);
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V3-32 skip, size = %d", size);
break;
case 0x9:
vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 4);
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size);
break;
case 0xA:
vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 4);
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V3-8 skip, size = %d", size);
break;
case 0xC:
@ -312,15 +300,15 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int
VIFUNPACK_LOG("Processing V4-32 skip, size = %d, CL = %d, WL = %d", size, vif1Regs->cycle.cl, vif1Regs->cycle.wl);
break;
case 0xD:
vif->tag.addr += size * 2;
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V4-16 skip, size = %d", size);
break;
case 0xE:
vif->tag.addr += size * 4;
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V4-8 skip, size = %d", size);
break;
case 0xF:
vif->tag.addr += size * 8;
vif->tag.addr += (size / unpack->gsize) * 16;
VIFUNPACK_LOG("Processing V4-5 skip, size = %d", size);
break;
default:
@ -328,87 +316,59 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int
break;
}
if ((vif->tag.addr & 0xf) == unpack->gsize)
//Append any skips in to the equasion
if (vifRegs->cycle.cl > vifRegs->cycle.wl)
{
vif->tag.addr += 16 - unpack->gsize;
VIFUNPACK_LOG("Old addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl);
vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16);
VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl);
}
//This is sorted out later
if((vif->tag.addr & 0xf) != (vifRegs->offset * 4))
{
VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr);
vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4);
}
}
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
{
u32 *dest;
u32 unpackType;
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
vifStruct *vif;
VIFregisters *vifRegs;
VURegs * VU;
u8 *cdata = (u8*)data;
#ifdef _DEBUG
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
#endif
_mm_prefetch((char*)data, _MM_HINT_NTA);
if (VIFdmanum == 0)
{
VU = &VU0;
vif = &vif0;
vifRegs = vif0Regs;
assert(v->addr < memsize);
}
else
{
VU = &VU1;
vif = &vif1;
vifRegs = vif1Regs;
assert(v->addr < memsize);
if (vu1MicroIsSkipping())
{
// don't process since the frame is dummy
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
return;
}
}
dest = (u32*)(VU->Mem + v->addr);
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr);
#ifdef _DEBUG
if (v->size != size)
{
VIF_LOG("*PCSX2*: warning v->size != size");
}
if ((v->addr + size*4) > memsize)
{
Console::Notice("*PCSX2*: fixme unpack overflow");
Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x",
params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr);
}
#endif
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
// The unpack type
unpackType = v->cmd & 0xf;
if (size == 0)
{
VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask);
}
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
_vifRegs = (VIFregisters*)vifRegs;
_vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks;
_vif = vif;
_vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
ft = &VIFfuncTable[ unpackType ];
func = _vif->usn ? ft->funcU : ft->funcS;
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
@ -416,56 +376,64 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
memsize = size;
#endif
if (_vifRegs->offset > 0)
if(vif1Regs->offset != 0)
{
int destinc, unpacksize;
int unpacksize;
//This is just to make sure the alignment isnt loopy on a split packet
if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2))
{
DevCon::Error("Warning: Unpack alignment error");
}
VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
// SSE doesn't handle such small data
if (v->size != (size >> 2))
ProcessMemSkip(size, unpackType, VIFdmanum);
if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize)
VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset);
if (vifRegs->offset < (u32)ft->qsize)
{
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
{
Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset));
}
unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset));
}
else
{
unpacksize = 0;
Console::WriteLn("Unpack align offset = 0");
}
destinc = (4 - ft->qsize) + unpacksize;
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
func(dest, (u32*)cdata, unpacksize);
size -= unpacksize * ft->dsize;
cdata += unpacksize * ft->dsize;
vifRegs->num--;
++vif->cl;
if (vif->cl == vifRegs->cycle.wl)
{
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
else
dest += destinc;
vif->cl = 0;
{
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4;
//dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc;
}
else
{
dest += destinc;
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
//dest += destinc;
}
vif->cl = 0;
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
return size >> 2;
}
else if (v->size != (size >> 2))
ProcessMemSkip(size, unpackType, VIFdmanum);
else
{
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
dest += (4 - ft->qsize) + unpacksize;
cdata += unpacksize * ft->dsize;
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
}
}
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
if (vif->cl != 0) //Check alignment for SSE unpacks
{
#ifdef _DEBUG
@ -474,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
int incdest;
if (vif->cl != 0)
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
{
// continuation from last stream
@ -491,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if (vif->cl == vifRegs->cycle.wl)
{
dest += incdest;
vif->tag.addr += incdest * 4;
vif->cl = 0;
break;
}
dest += 4;
vif->tag.addr += 16;
}
// have to update
_vifRow[0] = _vifRegs->r0;
_vifRow[1] = _vifRegs->r1;
_vifRow[2] = _vifRegs->r2;
_vifRow[3] = _vifRegs->r3;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0;
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
if ((size >= ft->gsize) && !(v->addr&0xf))
}
}
return size>>2;
}
static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum)
{
u32 *dest;
u32 unpackType;
UNPACKFUNCTYPE func;
const VIFUnpackFuncTable *ft;
VURegs * VU;
u8 *cdata = (u8*)data;
#ifdef _DEBUG
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
#endif
_mm_prefetch((char*)data, _MM_HINT_NTA);
if (VIFdmanum == 0)
{
VU = &VU0;
//vifRegs = vif0Regs;
assert(v->addr < memsize);
}
else
{
VU = &VU1;
//vifRegs = vif1Regs;
assert(v->addr < memsize);
if (vu1MicroIsSkipping())
{
// don't process since the frame is dummy
vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16);
return;
}
}
dest = (u32*)(VU->Mem + v->addr);
VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
// The unpack type
unpackType = v->cmd & 0xf;
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
ft = &VIFfuncTable[ unpackType ];
func = vif->usn ? ft->funcU : ft->funcS;
size <<= 2;
#ifdef _DEBUG
memsize = size;
#endif
#ifdef VIFUNPACKDEBUG
if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) *
((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000))
{
//Sanity Check (memory overflow)
DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000);
}
#endif
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write
{
#ifdef _DEBUG
static int s_count = 0;
#endif
if (size >= ft->gsize)
{
const UNPACKPARTFUNCTYPESSE* pfn;
int writemask;
@ -554,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle;
if(vifRegs->mode == 2)
{
//Update the reg rows for non SSE
vifRegs->r0 = vifRow[0];
vifRegs->r1 = vifRow[1];
vifRegs->r2 = vifRow[2];
vifRegs->r3 = vifRow[3];
}
// if size is left over, update the src,dst pointers
if (writemask > 0)
{
@ -561,107 +626,65 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
cdata += left * ft->gsize;
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
vifRegs->num -= left;
_vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
}
else
{
vifRegs->num -= size / ft->gsize;
if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
}
vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
size = writemask;
_vifRegs->r0 = _vifRow[0];
_vifRegs->r1 = _vifRow[1];
_vifRegs->r2 = _vifRow[2];
_vifRegs->r3 = _vifRow[3];
}
else
{
if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available
{
// v4-32
if ((vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0))
{
vifRegs->num -= size >> 4;
memcpy_fast((u8*)dest, cdata, size);
size = 0;
return;
}
}
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
while ((size >= ft->gsize) && (vifRegs->num > 0))
{
func(dest, (u32*)cdata, ft->qsize);
cdata += ft->gsize;
size -= ft->gsize;
vifRegs->num--;
++vif->cl;
if (vif->cl == vifRegs->cycle.wl)
{
dest += incdest;
vif->cl = 0;
}
else
{
dest += 4;
}
}
// have to update
_vifRow[0] = _vifRegs->r0;
_vifRow[1] = _vifRegs->r1;
_vifRow[2] = _vifRegs->r2;
_vifRow[3] = _vifRegs->r3;
}
// used for debugging vif
// {
// int i, j, k;
// u32* curdest = olddest;
// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w");
// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr);
// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle);
// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]);
// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3);
//
// for(i = 0; i < memsize; ) {
// for(k = 0; k < vifRegs->cycle.wl; ++k) {
// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) {
// fprintf(ftemp, "%x ", curdest[4*k+j]);
// }
// }
//
// fprintf(ftemp, "\n");
// curdest += 4*vifRegs->cycle.cl;
// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl;
// }
// fclose(ftemp);
// }
// s_count++;
if (size >= ft->dsize && vifRegs->num > 0)
{
//VIF_LOG("warning, end with size = %d", size);
/* unpack one qword */
vif->tag.addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
}
}
else
{
vifRegs->num -= size / ft->gsize;
if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
size = 0;
}
}
else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have
{
//VIF_LOG("warning, end with size = %d", size);
/* unpack one qword */
vif->tag.addr += (size / ft->dsize) * 4;
func(dest, (u32*)cdata, size / ft->dsize);
size = 0;
if(vifRegs->mode == 2)
{
//Update the reg rows for SSE
vifRow[0] = vifRegs->r0;
vifRow[1] = vifRegs->r1;
vifRow[2] = vifRegs->r2;
vifRow[3] = vifRegs->r3;
}
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr);
}
}
else /* filling write */
{
VIF_LOG("VIFunpack - filling write");
if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0)
DevCon::Notice("Filling write warning! Size < packet size and CL != 0");
VIFUNPACK_LOG("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType);
while (size >= ft->gsize || vifRegs->num > 0)
while (vifRegs->num > 0)
{
if (vif->cl == vifRegs->cycle.wl)
{
@ -679,6 +702,11 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
{
vif->cl = 0;
}
if(size < ft->gsize)
{
VIF_LOG("Out of Filling write data");
break;
}
}
else
{
@ -786,17 +814,21 @@ static __forceinline void vif0UNPACK(u32 *data)
len = ((((32 >> vl) * (vn + 1)) * n) + 31) >> 5;
}
vif0.wl = 0;
vif0.cl = 0;
vif0.tag.cmd = vif0.cmd;
vif0.tag.addr &= 0xfff;
vif0.tag.size = len;
vif0Regs->offset = 0;
vifRegs = (VIFregisters*)vif0Regs;
vifMaskRegs = g_vif0Masks;
vif = &vif0;
vifRow = g_vifRow0;
}
static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size)
static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size)
{
/* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size);
/* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size);
{
FILE *f = fopen("vu1.raw", "wb");
fwrite(data, 1, size*4, f);
@ -900,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
{
if (vif0.vifpacketsize < vif0.tag.size)
{
_vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize);
vif0.tag.addr += vif0.vifpacketsize << 2;
vif0.tag.size -= vif0.vifpacketsize;
return vif0.vifpacketsize;
@ -909,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG
{
int ret;
_vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size);
ret = vif0.tag.size;
vif0.tag.size = 0;
vif0.cmd = 0;
@ -924,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
{
/* size is less that the total size, transfer is 'in pieces' */
VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum);
ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum);
vif0.tag.size -= vif0.vifpacketsize;
FreezeXMMRegs(0);
return vif0.vifpacketsize;
@ -931,15 +966,28 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK
else
{
/* we got all the data, transfer it fully */
int ret;
int ret = vif0.tag.size;
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
ret = vif0.tag.size;
//Align data after a split transfer first
if(vif0Regs->offset != 0 || vif0.cl != 0)
{
vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
data += ret - vif0.tag.size;
if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
else
{
VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum);
vif0.tag.size = 0;
vif0.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1516,15 +1564,20 @@ static __forceinline void vif1UNPACK(u32 *data)
else
vif1.tag.addr = vif1Regs->code & 0x3ff;
vif1Regs->offset = 0;
vif1.cl = 0;
vif1.tag.addr <<= 4;
vif1.tag.cmd = vif1.cmd;
vifRegs = (VIFregisters*)vif1Regs;
vifMaskRegs = g_vif1Masks;
vif = &vif1;
vifRow = g_vifRow1;
}
static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size)
static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size)
{
/* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size);
/* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size);
{
FILE *f = fopen("vu1.raw", "wb");
fwrite(data, 1, size*4, f);
@ -1626,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
{
if (vif1.vifpacketsize < vif1.tag.size)
{
_vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize);
vif1.tag.addr += vif1.vifpacketsize << 2;
vif1.tag.size -= vif1.vifpacketsize;
return vif1.vifpacketsize;
@ -1634,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data)
else
{
int ret;
_vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size);
ret = vif1.tag.size;
vif1.tag.size = 0;
vif1.cmd = 0;
@ -1735,21 +1788,36 @@ static int __fastcall Vif1TransUnpack(u32 *data)
/* size is less that the total size, transfer is
'in pieces' */
VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum);
ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum);
vif1.tag.size -= vif1.vifpacketsize;
FreezeXMMRegs(0);
return vif1.vifpacketsize;
}
else
{
int ret;
/* we got all the data, transfer it fully */
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
ret = vif1.tag.size;
int ret = vif1.tag.size;
if(vif1Regs->offset != 0 || vif1.cl != 0)
{
vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
data += ret - vif1.tag.size;
if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
else
{
/* we got all the data, transfer it fully */
VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum);
vif1.tag.size = 0;
vif1.cmd = 0;
FreezeXMMRegs(0);
return ret;
}
}
}

View File

@ -32,7 +32,7 @@ struct vifStruct {
int cmd;
int irq;
int cl;
int wl;
int qwcalign;
u8 usn;
// The next three should be boolean, and will be next time I break savestate compatability. --arcum42

View File

@ -947,7 +947,6 @@
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="1"
PrecompiledHeaderFile="$(IntDir)\$(TargetName).pch"
/>
</FileConfiguration>
<FileConfiguration
@ -2507,6 +2506,10 @@
RelativePath="..\..\x86\microVU_Compile.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Execute.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Lower.inl"
>
@ -2912,149 +2915,36 @@
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86.inl"
RelativePath="..\..\x86\ix86\ix86_3dnow.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_3dnow.inl"
>
<FileConfiguration
Name="Devel vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_cpudetect.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_fpu.inl"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Devel vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_macros.h"
RelativePath="..\..\x86\ix86\ix86_fpu.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_mmx.inl"
RelativePath="..\..\x86\ix86\ix86_group1.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Devel vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_sse.inl"
RelativePath="..\..\x86\ix86\ix86_internal.h"
>
<FileConfiguration
Name="Debug|Win32"
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy.cpp"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Devel vm|Win32"
</File>
<File
RelativePath="..\..\x86\ix86\ix86_mmx.cpp"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug vm|Win32"
</File>
<File
RelativePath="..\..\x86\ix86\ix86_sse.cpp"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
<FileConfiguration
Name="Release vm|Win32"
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_sse_helpers.h"

View File

@ -39,6 +39,8 @@ const char* g_pRunGSState = NULL;
#define CmdSwitchIs( text ) ( stricmp( command, text ) == 0 )
extern u8 *recMem;
int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
{
const _EXCEPTION_RECORD& ExceptionRecord = *eps->ExceptionRecord;
@ -50,6 +52,7 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
// get bad virtual address
uptr addr=ExceptionRecord.ExceptionInformation[1];
u8* pcode=(u8*)ExceptionRecord.ExceptionAddress;
//this is a *hackfix* for a bug on x64 windows kernels.They do not give correct address
//if the error is a missaligned access (they return 0)
@ -60,16 +63,17 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps )
}
u32 offset = addr-(uptr)psM;
if (addr&0x80000000)
if (addr&0x80000000 && ((pcode-recMem)<(16*1024*1024)) )
{
uptr _vtlb_HandleRewrite(u32 info,u8* ra);
u8* pcode=(u8*)ExceptionRecord.ExceptionAddress;
u32 patch_point=1;
s32 patch_point=1;
//01 C1
while(pcode[-patch_point]!=0x81 || pcode[-patch_point-1]!=0xC1 || pcode[-patch_point-2]!=0x01)
{
patch_point++;
if (patch_point>0x100)
return EXCEPTION_CONTINUE_SEARCH;
}
assert(pcode[-patch_point]==0x81);
pcode[-patch_point]=0xF;//js32, 0x81 is add32

View File

@ -70,10 +70,27 @@ int BaseBlocks::LastIndex(u32 startpc) const
return imin;
}
BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) const
BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip)
{
// TODO
if (0 == blocks.size())
return 0;
int imin = 0, imax = blocks.size() - 1, imid;
while(imin != imax) {
imid = (imin+imax+1)>>1;
if (blocks[imid].fnptr > ip)
imax = imid - 1;
else
imin = imid;
}
if (ip < blocks[imin].fnptr ||
ip >= blocks[imin].fnptr + blocks[imin].x86size)
return 0;
return &blocks[imin];
}
void BaseBlocks::Link(u32 pc, uptr jumpptr)

View File

@ -18,14 +18,9 @@
#pragma once
#include "PrecompiledHeader.h"
#include <vector>
#include <map>
#include <map> // used by BaseBlockEx
#include <utility>
// used to keep block information
#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot
// Every potential jump point in the PS2's addressable memory has a BASEBLOCK
// associated with it. So that means a BASEBLOCK for every 4 bytes of PS2
// addressable memory. Yay!
@ -73,7 +68,7 @@ public:
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
int LastIndex (u32 startpc) const;
BASEBLOCKEX* GetByX86(uptr ip) const;
BASEBLOCKEX* GetByX86(uptr ip);
inline int Index (u32 startpc) const
{
@ -119,7 +114,6 @@ public:
}
};
#define GET_BLOCKTYPE(b) ((b)->Type)
#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000],

View File

@ -18,9 +18,9 @@
*/
.intel_syntax noprefix
.extern _vifRegs
.extern _vifMaskRegs
.extern _vifRow
.extern vifRegs
.extern vifMaskRegs
.extern vifRow
#define VIF_ESP esp
#define VIF_SRC esi
@ -108,7 +108,7 @@
// setting up masks
#define UNPACK_Setup_Mask_SSE(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \
@ -118,7 +118,7 @@
#define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL)
#define UNPACK_Start_Setup_Mask_SSE_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \
pand XMM_ROWMASK, XMM_ROW; \
@ -129,12 +129,12 @@
#define UNPACK_Setup_Mask_SSE_0_1(CL)
#define UNPACK_Setup_Mask_SSE_1_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
// ignore CL, since vif.cycle.wl == 1
#define UNPACK_Setup_Mask_SSE_2_1(CL) \
mov VIF_TMPADDR, _vifMaskRegs; \
mov VIF_TMPADDR, vifMaskRegs; \
movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \
movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \
movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \
@ -1312,9 +1312,9 @@
#pragma warning(disable:4731)
#define SAVE_ROW_REG_BASE \
mov VIF_TMPADDR, _vifRow; \
mov VIF_TMPADDR, vifRow; \
movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \
mov VIF_TMPADDR, _vifRegs; \
mov VIF_TMPADDR, vifRegs; \
movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \
psrldq XMM_ROW, 4; \
movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \
@ -1349,7 +1349,7 @@
.globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \
UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \
INIT_ARGS(); \
mov VIF_TMPADDR, _vifRegs; \
mov VIF_TMPADDR, vifRegs; \
movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \
movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \
sub VIF_INC, VIF_SAVEEBX; \

View File

@ -18,7 +18,7 @@
#include "PrecompiledHeader.h"
#include "Misc.h"
#include "System.h"
#include "iR5900.h"
#include "Vif.h"
#include "VU.h"

View File

@ -1956,14 +1956,14 @@ CPU_SSE_XMMCACHE_END
// Both Macros are 16 bytes so we can use a shift instead of a Mul instruction
#define QFSRVhelper0() { \
ajmp[0] = JMP32(0); \
x86Ptr[0] += 11; \
x86Ptr += 11; \
}
#define QFSRVhelper(shift1, shift2) { \
SSE2_PSRLDQ_I8_to_XMM(EEREC_D, shift1); \
SSE2_PSLLDQ_I8_to_XMM(t0reg, shift2); \
ajmp[shift1] = JMP32(0); \
x86Ptr[0] += 1; \
x86Ptr += 1; \
}
void recQFSRV()
@ -1982,8 +1982,8 @@ void recQFSRV()
MOV32MtoR(EAX, (uptr)&cpuRegs.sa);
SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros)
AND32I8toR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes)
ADD32ItoEAX((uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes
AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes)
ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes
JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases)
// Case 0:
@ -2676,9 +2676,6 @@ CPU_SSE_XMMCACHE_END
recCall( Interp::PHMADH, _Rd_ );
}
////////////////////////////////////////////////////
//upper word of each doubleword in LO and HI is undocumented/undefined
//contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result)
void recPMSUBH()
{
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI)
@ -2740,12 +2737,8 @@ CPU_SSE_XMMCACHE_END
}
////////////////////////////////////////////////////
// rs = ... a1 a0
// rt = ... b1 b0
// rd = ... a1*b1 - a0*b0
// hi = ...
// lo = ... (undefined by doc)NOT(a1*b1), a1*b1 - a0*b0
//upper word of each doubleword in LO and HI is undocumented/undefined
//it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result)
void recPHMSBH()
{
CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI)

View File

@ -24,6 +24,8 @@
#include "PrecompiledHeader.h"
#include "iR3000A.h"
#include "BaseblockEx.h"
#include <time.h>
#ifndef _WIN32
@ -171,7 +173,7 @@ static void iIopDumpBlock( int startpc, u8 * ptr )
#ifdef __LINUX__
// dump the asm
f = fopen( "mydump1", "wb" );
fwrite( ptr, 1, (uptr)x86Ptr[0] - (uptr)ptr, f );
fwrite( ptr, 1, (uptr)x86Ptr - (uptr)ptr, f );
fclose( f );
sprintf( command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 | cat %s - > tempdump", filename );
system( command );
@ -316,7 +318,7 @@ void _psxMoveGPRtoM(u32 to, int fromgpr)
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr)
{
if( PSX_IS_CONST1(fromgpr) )
MOV32ItoRmOffset( to, g_psxConstRegs[fromgpr], 0 );
MOV32ItoRm( to, g_psxConstRegs[fromgpr] );
else {
// check x86
MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[ fromgpr ] );
@ -647,7 +649,7 @@ static void recExecute()
//for (;;) R3000AExecute();
}
static s32 recExecuteBlock( s32 eeCycles )
static __forceinline s32 recExecuteBlock( s32 eeCycles )
{
psxBreak = 0;
psxCycleEE = eeCycles;
@ -741,7 +743,7 @@ static __forceinline u32 psxRecClearMem(u32 pc)
return upperextent - pc;
}
static void recClear(u32 Addr, u32 Size)
static __forceinline void recClearIOP(u32 Addr, u32 Size)
{
u32 pc = Addr;
while (pc < Addr + Size*4)
@ -772,7 +774,7 @@ void psxSetBranchReg(u32 reg)
_psxFlushCall(FLUSH_EVERYTHING);
iPsxBranchTest(0xffffffff, 1);
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 ));
}
void psxSetBranchImm( u32 imm )
@ -796,7 +798,7 @@ void psxSetBranchImm( u32 imm )
// So for now these are new settings that work.
// (rama)
static u32 psxScaleBlockCycles()
static __forceinline u32 psxScaleBlockCycles()
{
return s_psxBlockCycles * (CHECK_IOP_CYCLERATE ? 2 : 1);
}
@ -828,7 +830,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
if( newpc != 0xffffffff )
{
CMP32ItoM((uptr)&psxRegs.pc, newpc);
JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 6 ));
JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 6 ));
}
// Skip branch jump target here:
@ -864,7 +866,7 @@ void rpsxSYSCALL()
ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() );
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 ));
// jump target for skipping blockCycle updates
x86SetJ8(j8Ptr[0]);
@ -884,7 +886,7 @@ void rpsxBREAK()
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() );
SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 );
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
//if (!psxbranch) psxbranch = 2;
@ -1004,7 +1006,7 @@ void iopRecRecompile(u32 startpc)
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[_EmitterId_];
recPtr = x86Ptr;
s_pCurBlock = PSX_GETBLOCK(startpc);
@ -1025,7 +1027,7 @@ void iopRecRecompile(u32 startpc)
psxbranch = 0;
s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] );
s_pCurBlock->SetFnptr( (uptr)x86Ptr );
s_psxBlockCycles = 0;
// reset recomp state variables
@ -1160,7 +1162,7 @@ StartRecomp:
iPsxBranchTest(0xffffffff, 1);
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 ));
}
else {
if( psxbranch ) assert( !willbranch3 );
@ -1180,12 +1182,12 @@ StartRecomp:
}
}
assert( x86Ptr[0] < recMem+RECMEM_SIZE );
assert( x86Ptr < recMem+RECMEM_SIZE );
assert(x86Ptr[_EmitterId_] - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr;
assert(x86Ptr - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr - recPtr;
recPtr = x86Ptr[0];
recPtr = x86Ptr;
assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg );
@ -1198,7 +1200,7 @@ R3000Acpu psxRec = {
recResetIOP,
recExecute,
recExecuteBlock,
recClear,
recClearIOP,
recShutdown
};

View File

@ -18,12 +18,10 @@
#ifndef _R3000A_SUPERREC_
#define _R3000A_SUPERREC_
#define _EmitterId_ EmitterId_R3000a
#include "ix86/ix86.h"
#include "R3000A.h"
#include "iCore.h"
#include "BaseblockEx.h"
// Cycle penalties for particularly slow instructions.
static const int psxInstCycles_Mult = 7;

View File

@ -1258,7 +1258,7 @@ void rpsxJALR()
static void* s_pbranchjmp;
static u32 s_do32 = 0;
#define JUMPVALID(pjmp) (( x86Ptr[0] - (u8*)pjmp ) <= 0x80)
#define JUMPVALID(pjmp) (( x86Ptr - (u8*)pjmp ) <= 0x80)
void rpsxSetBranchEQ(int info, int process)
{
@ -1305,7 +1305,7 @@ void rpsxBEQ_process(int info, int process)
else
{
_psxFlushAllUnused();
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
s_do32 = 0;
psxSaveBranchState();
@ -1318,7 +1318,7 @@ void rpsxBEQ_process(int info, int process)
x86SetJ8A( (u8*)s_pbranchjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
s_do32 = 1;
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
@ -1369,7 +1369,7 @@ void rpsxBNE_process(int info, int process)
}
_psxFlushAllUnused();
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
s_do32 = 0;
rpsxSetBranchEQ(info, process);
@ -1381,7 +1381,7 @@ void rpsxBNE_process(int info, int process)
x86SetJ8A( (u8*)s_pbranchjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
s_do32 = 1;
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
@ -1423,7 +1423,7 @@ void rpsxBLTZ()
}
CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
u8* pjmp = JL8(0);
psxSaveBranchState();
@ -1435,7 +1435,7 @@ void rpsxBLTZ()
x86SetJ8A( pjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
psxLoadBranchState();
@ -1470,7 +1470,7 @@ void rpsxBGEZ()
}
CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
u8* pjmp = JGE8(0);
psxSaveBranchState();
@ -1482,7 +1482,7 @@ void rpsxBGEZ()
x86SetJ8A( pjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
psxLoadBranchState();
@ -1524,7 +1524,7 @@ void rpsxBLTZAL()
}
CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
u8* pjmp = JL8(0);
psxSaveBranchState();
@ -1538,7 +1538,7 @@ void rpsxBLTZAL()
x86SetJ8A( pjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
psxLoadBranchState();
@ -1577,7 +1577,7 @@ void rpsxBGEZAL()
}
CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
u8* pjmp = JGE8(0);
MOV32ItoM((uptr)&psxRegs.GPR.r[31], psxpc+4);
@ -1591,7 +1591,7 @@ void rpsxBGEZAL()
x86SetJ8A( pjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
psxLoadBranchState();
@ -1631,7 +1631,7 @@ void rpsxBLEZ()
_clearNeededX86regs();
CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
u8* pjmp = JLE8(0);
psxSaveBranchState();
@ -1642,7 +1642,7 @@ void rpsxBLEZ()
x86SetJ8A( pjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
psxLoadBranchState();
@ -1679,7 +1679,7 @@ void rpsxBGTZ()
_clearNeededX86regs();
CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
u8* prevx86 = x86Ptr[0];
u8* prevx86 = x86Ptr;
u8* pjmp = JG8(0);
psxSaveBranchState();
@ -1690,7 +1690,7 @@ void rpsxBGTZ()
x86SetJ8A( pjmp );
}
else {
x86Ptr[0] = prevx86;
x86SetPtr( prevx86 );
psxpc -= 4;
psxRegs.code = iopMemRead32( psxpc - 4 );
psxLoadBranchState();

View File

@ -19,13 +19,11 @@
#ifndef __IR5900_H__
#define __IR5900_H__
#define _EmitterId_ EmitterId_R5900
#include "ix86/ix86.h"
#include "ix86/ix86_sse_helpers.h"
#include "R5900.h"
#include "VU.h"
#include "iCore.h"
#include "BaseblockEx.h" // needed for recClear and stuff
// Yay! These work now! (air) ... almost (air)
#define ARITHMETICIMM_RECOMPILE

View File

@ -23,6 +23,7 @@
#include "VUmicro.h"
#include "iVUzerorec.h"
#ifndef PCSX2_MICROVU_
namespace VU0micro
{
void recAlloc()
@ -62,6 +63,34 @@ namespace VU0micro
FreezeXMMRegs(0);
}
}
#else
extern void initVUrec(VURegs* vuRegs, const int vuIndex);
extern void closeVUrec(const int vuIndex);
extern void resetVUrec(const int vuIndex);
extern void clearVUrec(u32 addr, u32 size, const int vuIndex);
extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex);
namespace VU0micro
{
void recAlloc() { initVUrec(&VU0, 0); }
void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 0); }
void recShutdown() { closeVUrec(0); }
static void recReset() { resetVUrec(0); x86FpuState = FPU_STATE; }
static void recStep() {}
static void recExecuteBlock()
{
if((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return;
FreezeXMMRegs(1);
FreezeMMXRegs(1);
runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0);
FreezeXMMRegs(0);
FreezeMMXRegs(0);
}
}
#endif
using namespace VU0micro;

View File

@ -29,7 +29,7 @@
#ifdef _DEBUG
extern u32 vudump;
#endif
#ifndef PCSX2_MICROVU_
namespace VU1micro
{
void recAlloc()
@ -121,6 +121,34 @@ namespace VU1micro
FreezeXMMRegs(0);
}
}
#else
extern void initVUrec(VURegs* vuRegs, const int vuIndex);
extern void closeVUrec(const int vuIndex);
extern void resetVUrec(const int vuIndex);
extern void clearVUrec(u32 addr, u32 size, const int vuIndex);
extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex);
namespace VU1micro
{
void recAlloc() { initVUrec(&VU1, 1); }
void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 1); }
void recShutdown() { closeVUrec(1); }
static void recReset() { resetVUrec(1); x86FpuState = FPU_STATE; }
static void recStep() {}
static void recExecuteBlock() {
if((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) return;
assert( (VU1.VI[REG_TPC].UL&7) == 0 );
FreezeXMMRegs(1);
FreezeMMXRegs(0);
runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1);
FreezeXMMRegs(0);
FreezeMMXRegs(0);
}
}
#endif
using namespace VU1micro;

View File

@ -280,6 +280,7 @@ void _recvuIALUTestStall(VURegs * VU, int reg) {
VU->ialu[i].enable = 0;
vucycle+= cycle;
_recvuTestPipes(VU, true);
}
void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) {
@ -387,7 +388,7 @@ void _recvuFlushFDIV(VURegs * VU) {
if (VU->fdiv.enable == 0) return;
cycle = VU->fdiv.Cycle - (vucycle - VU->fdiv.sCycle);
cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12)
// Console::WriteLn("waiting FDIV pipe %d", params cycle);
VU->fdiv.enable = 0;
vucycle+= cycle;

View File

@ -354,7 +354,7 @@ void recVUMI_IADD( VURegs *VU, int info )
if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg);
else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg);
else LEA16RRtoR(fdreg, fsreg, ftreg);
else LEA32RRtoR(fdreg, fsreg, ftreg);
MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0
}
}
@ -609,31 +609,31 @@ void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info)
if( x86reg >= 0 ) {
switch(_X_Y_Z_W) {
case 3: // ZW
SSE_MOVHPS_RmOffset_to_XMM(EEREC_T, x86reg, offset+8);
SSE_MOVHPS_Rm_to_XMM(EEREC_T, x86reg, offset+8);
break;
case 6: // YZ
SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0x9c);
SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0x9c);
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78);
break;
case 8: // X
SSE_MOVSS_RmOffset_to_XMM(EEREC_TEMP, x86reg, offset);
SSE_MOVSS_Rm_to_XMM(EEREC_TEMP, x86reg, offset);
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
break;
case 9: // XW
SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0xc9);
SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0xc9);
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2);
break;
case 12: // XY
SSE_MOVLPS_RmOffset_to_XMM(EEREC_T, x86reg, offset);
SSE_MOVLPS_Rm_to_XMM(EEREC_T, x86reg, offset);
break;
case 15:
if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_T, x86reg, offset);
else SSE_MOVUPSRmtoROffset(EEREC_T, x86reg, offset);
if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_T, x86reg, offset);
else SSE_MOVUPSRmtoR(EEREC_T, x86reg, offset);
break;
default:
if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
else SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_TEMP, x86reg, offset);
else SSE_MOVUPSRmtoR(EEREC_TEMP, x86reg, offset);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
break;
@ -795,15 +795,15 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
if ( _Fs_ == 0 ) {
if ( _XYZW_SS ) {
u32 c = _W ? 0x3f800000 : 0;
if ( x86reg >= 0 ) MOV32ItoRmOffset(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0))));
if ( x86reg >= 0 ) MOV32ItoRm(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0))));
else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c);
}
else {
if ( x86reg >= 0 ) {
if ( _X ) MOV32ItoRmOffset(x86reg, 0x00000000, offset);
if ( _Y ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+4);
if ( _Z ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+8);
if ( _W ) MOV32ItoRmOffset(x86reg, 0x3f800000, offset+12);
if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset);
if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4);
if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8);
if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12);
}
else {
if ( _X ) MOV32ItoM(offset, 0x00000000);
@ -818,29 +818,29 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
switch ( _X_Y_Z_W ) {
case 1: // W
SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x27);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12);
else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP);
break;
case 2: // Z
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8);
else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP);
break;
case 3: // ZW
if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8);
if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8);
else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S);
break;
case 4: // Y
SSE2_PSHUFLW_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4e);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4);
else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_TEMP);
break;
case 5: // YW
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if ( x86reg >= 0 ) {
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset+4);
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset+4);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12);
}
else {
SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S);
@ -850,14 +850,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
break;
case 6: // YZ
SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xc9);
if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4);
if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4);
else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_TEMP);
break;
case 7: // YZW
SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x93); //ZYXW
if ( x86reg >= 0 ) {
SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4);
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12);
SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12);
}
else {
SSE_MOVHPS_XMM_to_M64(offset+4, EEREC_TEMP);
@ -865,26 +865,26 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
}
break;
case 8: // X
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset);
else SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
break;
case 9: // XW
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset);
else SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12);
if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12);
else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP);
break;
case 10: //XZ
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if ( x86reg >= 0 ) {
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset);
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8);
}
else {
SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
@ -893,8 +893,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
break;
case 11: //XZW
if ( x86reg >= 0 ) {
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset);
SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset);
SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8);
}
else {
SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
@ -902,14 +902,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
}
break;
case 12: // XY
if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0);
if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0);
else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S);
break;
case 13: // XYW
SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4b); //YXZW
if ( x86reg >= 0 ) {
SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+0);
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12);
SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+0);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12);
}
else {
SSE_MOVHPS_XMM_to_M64(offset, EEREC_TEMP);
@ -919,8 +919,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
case 14: // XYZ
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if ( x86reg >= 0 ) {
SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0);
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8);
SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0);
SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8);
}
else {
SSE_MOVLPS_XMM_to_M64(offset, EEREC_S);
@ -929,11 +929,11 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
break;
case 15: // XYZW
if ( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_S, offset+0);
if( x86reg >= 0 ) SSE_MOVAPSRtoRm(x86reg, EEREC_S, offset+0);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S);
}
else {
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_S, offset+0);
if( x86reg >= 0 ) SSE_MOVUPSRtoRm(x86reg, EEREC_S, offset+0);
else {
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S);
@ -1018,7 +1018,7 @@ void recVUMI_ILW(VURegs *VU, int info)
}
else {
int fsreg = ALLOCVI(_Fs_, MODE_READ);
MOV32RmtoROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off);
MOV32RmtoR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off);
}
}
//------------------------------------------------------------------
@ -1051,10 +1051,10 @@ void recVUMI_ISW( VURegs *VU, int info )
x86reg = recVUTransformAddr(fsreg, VU, _Fs_, imm);
if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem);
if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4);
if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8);
if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12);
if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem);
if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4);
if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8);
if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12);
}
}
//------------------------------------------------------------------
@ -1082,7 +1082,7 @@ void recVUMI_ILWR( VURegs *VU, int info )
}
else {
int fsreg = ALLOCVI(_Fs_, MODE_READ);
MOVZX32Rm16toROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off);
MOVZX32Rm16toR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off);
}
}
//------------------------------------------------------------------
@ -1109,10 +1109,10 @@ void recVUMI_ISWR( VURegs *VU, int info )
int fsreg = ALLOCVI(_Fs_, MODE_READ);
x86reg = recVUTransformAddr(fsreg, VU, _Fs_, 0);
if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem);
if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4);
if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8);
if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12);
if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem);
if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4);
if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8);
if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12);
}
}
//------------------------------------------------------------------

View File

@ -58,7 +58,7 @@ extern void iDumpVU1Registers();
#define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on...
#ifndef _DEBUG
#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster)
//#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) )
#endif
#define SUPERVU_CHECKCONDITION 0 // has to be 0!!
@ -833,7 +833,7 @@ static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex)
SuperVURecompile();
s_recVUPtr = x86Ptr[0];
s_recVUPtr = x86Ptr;
// set the function's range
VuFunctionHeader::RANGE r;
@ -1889,7 +1889,7 @@ void VuBaseBlock::AssignVFRegs()
if( i == XMMREGS ) return; // nothing changed
}
u8* oldX86 = x86Ptr[0];
u8* oldX86 = x86Ptr;
FORIT(itinst, insts) {
@ -2060,7 +2060,7 @@ void VuBaseBlock::AssignVFRegs()
_freeXMMreg(free1);
_freeXMMreg(free2);
}
else if( regs->VIwrite & (1<<REG_P) || regs->VIwrite & (1<<REG_Q)) {
else if( regs->VIwrite & (1<<REG_P) || regs->VIwrite & (1<<REG_Q) || regs->VIread & (1<<REG_VF0_FLAG)) {
free1 = _allocTempXMMreg(XMMT_FPS, -1);
// protects against insts like esadd vf0 and sqrt vf0
if( free0 == -1 )
@ -2078,7 +2078,7 @@ void VuBaseBlock::AssignVFRegs()
}
}
assert( x86Ptr[0] == oldX86 );
assert( x86Ptr == oldX86 );
u32 analyzechildren = !(type&BLOCKTYPE_ANALYZED);
type |= BLOCKTYPE_ANALYZED;
@ -2302,10 +2302,11 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex)
//memset(recVUStack, 0, SUPERVU_STACKSIZE * 4);
// Clear allocation info to prevent bad data being used in other parts of pcsx2; doing this just incase (cottonvibes)
_initXMMregs();
_initMMXregs();
_initX86regs();
// Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2;
// not doing this because it's slow and not needed (rama)
// _initXMMregs();
// _initMMXregs();
// _initX86regs();
}
#if defined(_MSC_VER)
@ -2466,7 +2467,7 @@ static void SuperVURecompile()
AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 );
MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc);
JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 ));
JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 ));
}
// only other case is when there are two branches
else assert( (*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH );
@ -2606,11 +2607,11 @@ void SuperVUTestVU0Condition(u32 incstack)
ADD32ItoR(ESP, incstack);
//CALLFunc((u32)timeout);
JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 ));
JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 ));
x86SetJ8(ptr);
}
else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 6 ) );
else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 6 ) );
}
void VuBaseBlock::Recompile()
@ -2618,7 +2619,7 @@ void VuBaseBlock::Recompile()
if( type & BLOCKTYPE_ANALYZED ) return;
x86Align(16);
pcode = x86Ptr[0];
pcode = x86Ptr;
#ifdef _DEBUG
MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc);
@ -2726,7 +2727,7 @@ void VuBaseBlock::Recompile()
AND32ItoM( (uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu?~0x100:~0x001 ); // E flag
AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 );
if( !branch ) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc);
JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 ));
JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 ));
}
else {
@ -2868,7 +2869,7 @@ void VuBaseBlock::Recompile()
}
}
pendcode = x86Ptr[0];
pendcode = x86Ptr;
type |= BLOCKTYPE_ANALYZED;
LISTBLOCKS::iterator itchild;
@ -3569,7 +3570,7 @@ void recVUMI_BranchHandle()
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION)
MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc);
MOV32ItoR(s_JumpX86, 0);
s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr[0]-1;
s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1;
if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) {
j8Ptr[1] = JMP8(0);
@ -3578,7 +3579,7 @@ void recVUMI_BranchHandle()
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION )
MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8);
MOV32ItoR(s_JumpX86, 0);
s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr[0]-1;
s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1;
x86SetJ8( j8Ptr[ 1 ] );
}
@ -3815,7 +3816,7 @@ void recVUMI_B( VURegs* vuu, s32 info )
if( s_pCurBlock->blocks.size() > 1 ) {
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
MOV32ItoR(s_JumpX86, 0);
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1;
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1;
s_UnconditionalDelay = 1;
}
@ -3841,7 +3842,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info )
if( s_pCurBlock->blocks.size() > 1 ) {
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
MOV32ItoR(s_JumpX86, 0);
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1;
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1;
s_UnconditionalDelay = 1;
}

View File

@ -5,9 +5,9 @@
.xmm
extern _vifRegs:ptr
extern _vifMaskRegs:ptr
extern _vifRow:ptr
extern vifRegs:ptr
extern vifMaskRegs:ptr
extern vifRow:ptr
extern s_TempDecompress:ptr
@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0
UNPACK_Setup_Mask_SSE macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(CL) + 16]
movdqa xmm5, [eax + 64*(CL) + 32]
movdqa xmm3, [eax + 64*(CL)]
@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL
endm
UNPACK_Start_Setup_Mask_SSE_1 macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(CL) + 16]
movdqa xmm5, [eax + 64*(CL) + 32]
pand xmm4, xmm6
@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL
UNPACK_Setup_Mask_SSE_0_1 macro CL
endm
UNPACK_Setup_Mask_SSE_1_1 macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm3, [eax + 64*(0)]
endm
UNPACK_Setup_Mask_SSE_2_1 macro CL
mov eax, [_vifMaskRegs]
mov eax, [vifMaskRegs]
movdqa xmm4, [eax + 64*(0) + 16]
movdqa xmm5, [eax + 64*(0) + 32]
movdqa xmm3, [eax + 64*(0)]
@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType
SAVE_ROW_REG_BASE macro
mov eax, [_vifRow]
mov eax, [vifRow]
movdqa [eax], xmm6
mov eax, [_vifRegs]
mov eax, [vifRegs]
movss dword ptr [eax+0100h], xmm6
psrldq xmm6, 4
movss dword ptr [eax+0110h], xmm6
@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE
push ebx
INIT_ARGS
mov eax, [_vifRegs]
mov eax, [vifRegs]
movzx ecx, byte ptr [eax + 040h]
movzx ebx, byte ptr [eax + 041h]
sub ecx, ebx

View File

@ -17,7 +17,7 @@
*/
#include "PrecompiledHeader.h"
#include "Misc.h"
#include "System.h"
#include "iR5900.h"
#include "Vif.h"
#include "VU.h"
@ -161,7 +161,7 @@ void _flushConstRegs()
zero_cnt++;
}
rewindPtr = x86Ptr[_EmitterId_];
rewindPtr = x86Ptr;
for (i = 1, j = 0; i < 32; j++ && ++i, j %= 2) {
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1<<i))
@ -178,7 +178,7 @@ void _flushConstRegs()
}
if (minusone_cnt == 1 && !zero_cnt) { // not worth it for one byte
x86Ptr[_EmitterId_] = rewindPtr;
x86SetPtr( rewindPtr );
} else {
done[0] |= done[2];
done[1] |= done[3];
@ -1050,12 +1050,12 @@ void _recMove128MtoM(u32 to, u32 from)
// fixme - see above function!
void _recMove128RmOffsettoM(u32 to, u32 offset)
{
MOV32RmtoROffset(EAX, ECX, offset);
MOV32RmtoROffset(EDX, ECX, offset+4);
MOV32RmtoR(EAX, ECX, offset);
MOV32RmtoR(EDX, ECX, offset+4);
MOV32RtoM(to, EAX);
MOV32RtoM(to+4, EDX);
MOV32RmtoROffset(EAX, ECX, offset+8);
MOV32RmtoROffset(EDX, ECX, offset+12);
MOV32RmtoR(EAX, ECX, offset+8);
MOV32RmtoR(EDX, ECX, offset+12);
MOV32RtoM(to+8, EAX);
MOV32RtoM(to+12, EDX);
}
@ -1065,12 +1065,12 @@ void _recMove128MtoRmOffset(u32 offset, u32 from)
{
MOV32MtoR(EAX, from);
MOV32MtoR(EDX, from+4);
MOV32RtoRmOffset(ECX, EAX, offset);
MOV32RtoRmOffset(ECX, EDX, offset+4);
MOV32RtoRm(ECX, EAX, offset);
MOV32RtoRm(ECX, EDX, offset+4);
MOV32MtoR(EAX, from+8);
MOV32MtoR(EDX, from+12);
MOV32RtoRmOffset(ECX, EAX, offset+8);
MOV32RtoRmOffset(ECX, EDX, offset+12);
MOV32RtoRm(ECX, EAX, offset+8);
MOV32RtoRm(ECX, EDX, offset+12);
}
static PCSX2_ALIGNED16(u32 s_ones[2]) = {0xffffffff, 0xffffffff};

View File

@ -30,6 +30,9 @@
#include "iR5900Jump.h"
#include "iR5900LoadStore.h"
#include "iR5900Move.h"
#include "BaseblockEx.h"
#include "iMMI.h"
#include "iFPU.h"
#include "iCOP0.h"
@ -73,7 +76,7 @@ u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
static const int RECSTACK_SIZE = 0x00010000;
static const int EE_NUMBLOCKS = (1<<15);
static u8 *recMem = NULL; // the recompiled blocks will be here
u8 *recMem = NULL; // the recompiled blocks will be here
static u8* recStack = NULL; // stack mem
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
@ -128,11 +131,14 @@ static void iDumpBlock( int startpc, u8 * ptr )
Console::Status( "dump1 %x:%x, %x", params startpc, pc, cpuRegs.cycle );
Path::CreateDirectory( "dumps" );
#ifndef __LINUX__
ssprintf( filename, "dumps\\R5900dump%.8X.txt", startpc );
#else
ssprintf( filename, "dumps/R5900dump%.8X.txt", startpc );
#endif
fflush( stdout );
// f = fopen( "dump1", "wb" );
// fwrite( ptr, 1, (u32)x86Ptr[0] - (u32)ptr, f );
// fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f );
// fclose( f );
//
// sprintf( command, "objdump -D --target=binary --architecture=i386 dump1 > %s", filename );
@ -367,7 +373,7 @@ void _eeMoveGPRtoM(u32 to, int fromgpr)
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
{
if( GPR_IS_CONST1(fromgpr) )
MOV32ItoRmOffset( to, g_cpuConstRegs[fromgpr].UL[0], 0 );
MOV32ItoRm( to, g_cpuConstRegs[fromgpr].UL[0] );
else {
int mmreg;
@ -380,7 +386,7 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
}
else {
MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] );
MOV32RtoRm(to, EAX );
MOV32RtoRm( to, EAX );
}
}
}
@ -579,8 +585,8 @@ void recResetEE( void )
// so a fix will have to wait until later. -_- (air)
//x86SetPtr(recMem+REC_CACHEMEM);
//dyna_block_discard_recmem=(u8*)x86Ptr[0];
//JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr[0] + 5 ));
//dyna_block_discard_recmem=(u8*)x86Ptr;
//JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 ));
x86SetPtr(recMem);
@ -677,7 +683,7 @@ static void __naked DispatcherReg()
}
}
__forceinline void recExecute()
void recExecute()
{
// Optimization note : Compared pushad against manually pushing the regs one-by-one.
// Manually pushing is faster, especially on Core2's and such. :)
@ -791,7 +797,7 @@ void recSYSCALL( void ) {
CMP32ItoM((uptr)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles());
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 5 ));
JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
//branch = 2;
}
@ -1148,7 +1154,7 @@ static void iBranchTest(u32 newpc, bool noDispatch)
if (!noDispatch) {
if (newpc == 0xffffffff)
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 ));
JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 ));
else
iBranch(newpc, 1);
}
@ -1379,7 +1385,7 @@ void recRecompile( const u32 startpc )
x86SetPtr( recPtr );
x86Align(16);
recPtr = x86Ptr[_EmitterId_];
recPtr = x86Ptr;
s_pCurBlock = PC_GETBLOCK(startpc);
@ -1732,8 +1738,11 @@ StartRecomp:
if (bit==31)
{
vtlb_alloc_bits[writen_start]&=~mask;
if ((u8)mask==mask)
TEST8ItoM((uptr)&vtlb_alloc_bits[writen_start],mask);
else
TEST32ItoM((uptr)&vtlb_alloc_bits[writen_start],mask);
JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 ));
JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 ));
SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
mask=0;
}
@ -1755,8 +1764,11 @@ StartRecomp:
if (mask)
{
vtlb_alloc_bits[writen_start]&=~mask;
if ((u8)mask==mask)
TEST8ItoM((uptr)&vtlb_alloc_bits[writen_start],mask);
else
TEST32ItoM((uptr)&vtlb_alloc_bits[writen_start],mask);
JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 ));
JNZ32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 ));
SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
mask=0;
}
@ -1768,14 +1780,14 @@ StartRecomp:
{
// was dyna_block_discard_recmem. See note in recResetEE for details.
CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc));
JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 ));
JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 ));
stg-=4;
lpc+=4;
}
*/
DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params
startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4);
//DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params
// startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4);
}
}
inpage_ptr+=pgsz;
@ -1855,14 +1867,14 @@ StartRecomp:
}
}
assert( x86Ptr[0] < recMem+REC_CACHEMEM );
assert( x86Ptr < recMem+REC_CACHEMEM );
assert( recStackPtr < recStack+RECSTACK_SIZE );
assert( x86FpuState == 0 );
assert(x86Ptr[_EmitterId_] - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr;
assert(x86Ptr - recPtr < 0x10000);
s_pCurBlockEx->x86size = x86Ptr - recPtr;
recPtr = x86Ptr[0];
recPtr = x86Ptr;
assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );

View File

@ -1930,7 +1930,7 @@ void recLQC2( void )
dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0);
if( _Ft_ ) {
u8* rawreadptr = x86Ptr[0];
u8* rawreadptr = x86Ptr;
if( mmreg >= 0 ) {
SSEX_MOVDQARmtoROffset(mmreg, ECX, PS2MEM_BASE_+s_nAddMemOffset);
@ -1945,7 +1945,7 @@ void recLQC2( void )
// check if writing to VUs
CMP32ItoR(ECX, 0x11000000);
JAE8(rawreadptr - (x86Ptr[0]+2));
JAE8(rawreadptr - (x86Ptr+2));
PUSH32I( (int)&VU0.VF[_Ft_].UD[0] );
CALLFunc( (int)recMemRead128 );
@ -1999,7 +1999,7 @@ void recSQC2( void )
mmregs = _eePrepareReg(_Rs_);
dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0);
rawreadptr = x86Ptr[0];
rawreadptr = x86Ptr;
if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) {
SSEX_MOVDQARtoRmOffset(ECX, mmreg, PS2MEM_BASE_+s_nAddMemOffset);
@ -2039,7 +2039,7 @@ void recSQC2( void )
// check if writing to VUs
CMP32ItoR(ECX, 0x11000000);
JAE8(rawreadptr - (x86Ptr[0]+2));
JAE8(rawreadptr - (x86Ptr+2));
// some type of hardware write
if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) {
@ -2101,7 +2101,7 @@ void recLoad64( u32 bits, bool sign )
if ( _Imm_ != 0 )
ADD32ItoR( ECX, _Imm_ );
if( bits == 128 ) // force 16 byte alignment on 128 bit reads
AND32I8toR(ECX,0xF0);
AND32ItoR(ECX,~0x0F); // emitter automatically encodes this as an 8-bit sign-extended imm8
_eeOnLoadWrite(_Rt_);
EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension
@ -2198,7 +2198,7 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false)
if ( _Imm_ != 0 )
ADD32ItoR(ECX, _Imm_);
if (sz==128)
AND32I8toR(ECX,0xF0);
AND32ItoR(ECX,~0x0F);
vtlb_DynGenWrite(sz);
}

View File

@ -23,6 +23,7 @@
#include "iCore.h"
#include "iR5900.h"
#include "x86\ix86\ix86_internal.h"
u8* code_pos=0;
u8* code_start=0;
@ -63,7 +64,7 @@ void execuCode(bool set)
SysPrintf("Leaking 2 megabytes of ram\n");
code_start=code_pos=(u8*)VirtualAlloc(0,2*1024*1024,MEM_COMMIT,PAGE_EXECUTE_READWRITE);
code_sz+=2*1024*1024;
int i=0;
u32 i=0;
while(i<code_sz)
{
//UD2 is 0xF 0xB.Fill the stream with it so that the cpu don't try to execute past branches ..
@ -87,11 +88,11 @@ void execuCode(bool set)
u8* IndirectPlaceholderA()
{
//Add32 <eax>,imm, 6 bytes form.
write8<_EmitterId_>( 0x81 );
ModRM<_EmitterId_>( 3, 0, EAX );
write8( 0x81 );
ModRM( 3, 0, EAX );
u8* rv=x86SetPtr(0);
write32<_EmitterId_>(0);
write32(0);
return rv;
}
@ -106,10 +107,10 @@ void IndirectPlaceholderB(u8* pl,bool read,u32 sz,bool sx)
u8* old=x86SetPtr(pl);
inf.skip=old-pl-4;
//Add32 <eax>,imm, 6 bytes form, patch the imm value
write32<_EmitterId_>( inf.full );
write32( inf.full );
x86SetPtr(old);
}
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] );
PCSX2_ALIGNED16( extern u64 g_globalXMMData[2*XMMREGS] );
void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAddr=0,bool half=false )
{
int reg;
@ -130,24 +131,24 @@ void MOVx_SSE( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstAd
if (srcAddr)
SSE_MOVLPS_M64_to_XMM(reg,srcAddr);
else
SSE_MOVLPS_RmOffset_to_XMM(reg,srcRm,0);
SSE_MOVLPS_Rm_to_XMM(reg,srcRm);
if (dstAddr)
SSE_MOVLPS_XMM_to_M64(dstAddr,reg);
else
SSE_MOVLPS_XMM_to_RmOffset(destRm,reg,0);
SSE_MOVLPS_XMM_to_Rm(destRm,reg);
}
else
{
if (srcAddr)
SSE2_MOVDQA_M128_to_XMM(reg,srcAddr);
else
SSE2_MOVDQARmtoROffset(reg,srcRm,0);
SSE2_MOVDQARmtoR(reg,srcRm);
if (dstAddr)
SSE2_MOVDQA_XMM_to_M128(dstAddr,reg);
else
SSE2_MOVDQARtoRmOffset(destRm,reg,0);
SSE2_MOVDQARtoRm(destRm,reg);
}
@ -167,12 +168,12 @@ void MOV64_MMX( x86IntRegType destRm, x86IntRegType srcRm,u32 srcAddr=0,u32 dstA
if (srcAddr)
MOVQMtoR(freereg,srcAddr);
else
MOVQRmtoROffset(freereg,srcRm,0);
MOVQRmtoR(freereg,srcRm);
if (dstAddr)
MOVQRtoM(dstAddr,freereg);
else
MOVQRtoRmOffset(destRm,freereg,0);
MOVQRtoRm(destRm,freereg);
_freeMMXreg(freereg);
}
@ -482,7 +483,6 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
bits_base-=(alloc_base>>4)/8;//in bytes
BTS32MtoR(bits_base,ECX);
// BTS_wtf(asdasd,ECX);
}
static void _vtlb_DynGen_IndirectWrite( u32 bits )
@ -614,8 +614,7 @@ uptr _vtlb_HandleRewrite(u32 info,u8* ra)
u32 skip=GenIndirectMemOp(info);
JMP32(ra-x86Ptr[_EmitterId_]-5+skip);
JMP32(ra-x86Ptr-5+skip);
execuCode(false);
return rv;

View File

@ -1,4 +1,5 @@
INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty
noinst_LIBRARIES = libix86.a
libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h
libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_group1.cpp \
ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h

View File

@ -27,15 +27,465 @@
#include "PrecompiledHeader.h"
#include "System.h"
#include "ix86.h"
#include "ix86_internal.h"
u8 *x86Ptr[EmitterId_Count];
u8 *j8Ptr[32];
u32 *j32Ptr[32];
__threadlocal u8 *x86Ptr;
__threadlocal u8 *j8Ptr[32];
__threadlocal u32 *j32Ptr[32];
PCSX2_ALIGNED16(u32 p[4]);
PCSX2_ALIGNED16(u32 p2[4]);
PCSX2_ALIGNED16(float f[4]);
XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT };
namespace x86Emitter {
const x86IndexerType ptr;
//////////////////////////////////////////////////////////////////////////////////////////
//
const x86Register32 x86Register32::Empty( -1 );
const x86Register32 eax( 0 );
const x86Register32 ebx( 3 );
const x86Register32 ecx( 1 );
const x86Register32 edx( 2 );
const x86Register32 esi( 6 );
const x86Register32 edi( 7 );
const x86Register32 ebp( 5 );
const x86Register32 esp( 4 );
const x86Register16 ax( 0 );
const x86Register16 bx( 3 );
const x86Register16 cx( 1 );
const x86Register16 dx( 2 );
const x86Register16 si( 6 );
const x86Register16 di( 7 );
const x86Register16 bp( 5 );
const x86Register16 sp( 4 );
const x86Register8 al( 0 );
const x86Register8 cl( 1 );
const x86Register8 dl( 2 );
const x86Register8 bl( 3 );
const x86Register8 ah( 4 );
const x86Register8 ch( 5 );
const x86Register8 dh( 6 );
const x86Register8 bh( 7 );
//////////////////////////////////////////////////////////////////////////////////////////
// x86Register Method Implementations
//
x86ModRm x86Register32::operator+( const x86Register32& right ) const
{
return x86ModRm( *this, right );
}
x86ModRm x86Register32::operator+( const x86ModRm& right ) const
{
return right + *this;
}
x86ModRm x86Register32::operator+( s32 right ) const
{
return x86ModRm( *this, right );
}
x86ModRm x86Register32::operator*( u32 right ) const
{
return x86ModRm( Empty, *this, right );
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86ModRm Method Implementations
//
x86ModRm& x86ModRm::Add( const x86IndexReg& src )
{
if( src == Index )
{
Factor++;
}
else if( src == Base )
{
// Compound the existing register reference into the Index/Scale pair.
Base = x86IndexReg::Empty;
if( src == Index )
Factor++;
else
{
jASSUME( Index.IsEmpty() ); // or die if we already have an index!
Index = src;
Factor = 2;
}
}
else if( Base.IsEmpty() )
Base = src;
else if( Index.IsEmpty() )
Index = src;
else
assert( false ); // oops, only 2 regs allowed per ModRm!
return *this;
}
x86ModRm& x86ModRm::Add( const x86ModRm& src )
{
Add( src.Base );
Add( src.Displacement );
// If the factor is 1, we can just treat index like a base register also.
if( src.Factor == 1 )
{
Add( src.Index );
}
else if( Index.IsEmpty() )
{
Index = src.Index;
Factor = 1;
}
else if( Index == src.Index )
Factor++;
else
assert( false ); // oops, only 2 regs allowed!
return *this;
}
//////////////////////////////////////////////////////////////////////////////////////////
// ModSib Method Implementations
//
// ------------------------------------------------------------------------
// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values.
// Necessary because by default ModSib compounds registers into Index when possible.
//
void ModSib::Reduce()
{
// If no index reg, then load the base register into the index slot.
if( Index.IsEmpty() )
{
Index = Base;
Scale = 0;
Base = x86IndexReg::Empty;
return;
}
// The Scale has a series of valid forms, all shown here:
switch( Scale )
{
case 0: break;
case 1: Scale = 0; break;
case 2: Scale = 1; break;
case 3: // becomes [reg*2+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 1;
break;
case 4: Scale = 2; break;
case 5: // becomes [reg*4+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 2;
break;
case 6: // invalid!
assert( false );
break;
case 7: // so invalid!
assert( false );
break;
case 8: Scale = 3; break;
case 9: // becomes [reg*8+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 3;
break;
}
}
ModSib::ModSib( const x86ModRm& src ) :
Base( src.Base ),
Index( src.Index ),
Scale( src.Factor ),
Displacement( src.Displacement )
{
Reduce();
}
ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) :
Base( base ),
Index( index ),
Scale( scale ),
Displacement( displacement )
{
Reduce();
}
ModSib::ModSib( s32 displacement ) :
Base(),
Index(),
Scale(0),
Displacement( displacement )
{
}
// ------------------------------------------------------------------------
// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
// instruction ca be encoded as ModRm alone.
bool NeedsSibMagic( const ModSib& info )
{
// no registers? no sibs!
if( info.Index.IsEmpty() ) return false;
// A scaled register needs a SIB
if( info.Scale != 0 ) return true;
// two registers needs a SIB
if( !info.Base.IsEmpty() ) return true;
// If index register is ESP, then we need a SIB:
// (the ModSib::Reduce() ensures that stand-alone ESP will be in the
// index position for us)
if( info.Index == esp ) return true;
return false;
}
// ------------------------------------------------------------------------
// Conditionally generates Sib encoding information!
//
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
void EmitSibMagic( int regfield, const ModSib& info )
{
int displacement_size = (info.Displacement == 0) ? 0 :
( ( info.IsByteSizeDisp() ) ? 1 : 2 );
if( !NeedsSibMagic( info ) )
{
// Use ModRm-only encoding, with the rm field holding an index/base register, if
// one has been specified. If neither register is specified then use Disp32 form,
// which is encoded as "EBP w/o displacement" (which is why EBP must always be
// encoded *with* a displacement of 0, if it would otherwise not have one).
if( info.Index.IsEmpty() )
ModRM( 0, regfield, ModRm_UseDisp32 );
else
{
if( info.Index == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM( displacement_size, regfield, info.Index.Id );
}
}
else
{
// In order to encode "just" index*scale (and no base), we have to encode
// it as a special [index*scale + displacement] form, which is done by
// specifying EBP as the base register and setting the displacement field
// to zero. (same as ModRm w/o SIB form above, basically, except the
// ModRm_UseDisp flag is specified in the SIB instead of the ModRM field).
if( info.Base.IsEmpty() )
{
ModRM( 0, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
displacement_size = 2;
}
else
{
if( info.Base == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM( displacement_size, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, info.Base.Id );
}
}
switch( displacement_size )
{
case 0: break;
case 1: write8( info.Displacement ); break;
case 2: write32( info.Displacement ); break;
jNO_DEFAULT
}
}
// ------------------------------------------------------------------------
// Conditionally generates Sib encoding information!
//
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info )
{
EmitSibMagic( regfield.Id, info );
}
template< typename ToReg >
static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false )
{
int displacement_size = (src.Displacement == 0) ? 0 :
( ( src.IsByteSizeDisp() ) ? 1 : 2 );
// See EmitSibMagic for commenting on SIB encoding.
if( !NeedsSibMagic( src ) )
{
// LEA Land: means we have either 1-register encoding or just an offset.
// offset is encodable as an immediate MOV, and a register is encodable
// as a register MOV.
if( src.Index.IsEmpty() )
{
if( is16bit )
MOV16ItoR( to.Id, src.Displacement );
else
MOV32ItoR( to.Id, src.Displacement );
return;
}
else if( displacement_size == 0 )
{
if( is16bit )
MOV16RtoR( to.Id, src.Index.Id );
else
MOV32RtoR( to.Id, src.Index.Id );
return;
}
else
{
// note: no need to do ebp+0 check since we encode all 0 displacements as
// register assignments above (via MOV)
write8( 0x8d );
ModRM( displacement_size, to.Id, src.Index.Id );
}
}
else
{
if( src.Base.IsEmpty() )
{
if( displacement_size == 0 )
{
// Encode [Index*Scale] as a combination of Mov and Shl.
// This is more efficient because of the bloated format which requires
// a 32 bit displacement.
if( is16bit )
{
MOV16RtoR( to.Id, src.Index.Id );
SHL16ItoR( to.Id, src.Scale );
}
else
{
MOV32RtoR( to.Id, src.Index.Id );
SHL32ItoR( to.Id, src.Scale );
}
return;
}
write8( 0x8d );
ModRM( 0, to.Id, ModRm_UseSib );
SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 );
displacement_size = 2; // force 32bit displacement.
}
else
{
if( src.Base == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
write8( 0x8d );
ModRM( displacement_size, to.Id, ModRm_UseSib );
SibSB( src.Scale, src.Index.Id, src.Base.Id );
}
}
switch( displacement_size )
{
case 0: break;
case 1: write8( src.Displacement ); break;
case 2: write32( src.Displacement ); break;
jNO_DEFAULT
}
}
emitterT void LEA32( x86Register32 to, const ModSib& src )
{
EmitLeaMagic( to, src );
}
emitterT void LEA16( x86Register16 to, const ModSib& src )
{
// fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form?
write8( 0x66 );
EmitLeaMagic( to, src );
}
//////////////////////////////////////////////////////////////////////////////////////////
// Miscellaneous Section!
// Various Instructions with no parameter and no special encoding logic.
//
emitterT void RET() { write8( 0xC3 ); }
emitterT void CBW() { write16( 0x9866 ); }
emitterT void CWD() { write8( 0x98 ); }
emitterT void CDQ() { write8( 0x99 ); }
emitterT void CWDE() { write8( 0x98 ); }
emitterT void LAHF() { write8( 0x9f ); }
emitterT void SAHF() { write8( 0x9e ); }
//////////////////////////////////////////////////////////////////////////////////////////
// Push / Pop Emitters
//
// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu
// is running in. So even thought these say push32, they would essentially be push64 on
// an x64 build. Should I rename them accordingly? --air
//
// Note: pushad/popad implementations are intentionally left out. The instructions are
// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead.
emitterT void POP( x86Register32 from )
{
write8( 0x58 | from.Id );
}
emitterT void POP( const ModSib& from )
{
write8( 0x8f ); EmitSibMagic( 0, from );
}
emitterT void PUSH( u32 imm )
{
write8( 0x68 ); write32( imm );
}
emitterT void PUSH( x86Register32 from )
{
write8( 0x50 | from.Id );
}
emitterT void PUSH( const ModSib& from )
{
write8( 0xff ); EmitSibMagic( 6, from );
}
// pushes the EFLAGS register onto the stack
emitterT void PUSHFD() { write8( 0x9C ); }
// pops the EFLAGS register from the stack
emitterT void POPFD() { write8( 0x9D ); }
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,202 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
//------------------------------------------------------------------
// 3DNOW instructions
//------------------------------------------------------------------
/* femms */
emitterT void FEMMS( void )
{
write16( 0x0E0F );
}
emitterT void PFCMPEQMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0xB0 );
}
emitterT void PFCMPGTMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0xA0 );
}
emitterT void PFCMPGEMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x90 );
}
emitterT void PFADDMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x9E );
}
emitterT void PFADDRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x9E );
}
emitterT void PFSUBMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x9A );
}
emitterT void PFSUBRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x9A );
}
emitterT void PFMULMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0xB4 );
}
emitterT void PFMULRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0xB4 );
}
emitterT void PFRCPMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x96 );
}
emitterT void PFRCPRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x96 );
}
emitterT void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0xA6 );
}
emitterT void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0xB6 );
}
emitterT void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x97 );
}
emitterT void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0xA7 );
}
emitterT void PF2IDMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x1D );
}
emitterT void PF2IDRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x1D );
}
emitterT void PI2FDMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x0D );
}
emitterT void PI2FDRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x0D );
}
emitterT void PFMAXMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0xA4 );
}
emitterT void PFMAXRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0xA4 );
}
emitterT void PFMINMtoR( x86IntRegType to, uptr from )
{
write16( 0x0F0F );
ModRM( 0, to, DISP32 );
write32( from );
write8( 0x94 );
}
emitterT void PFMINRtoR( x86IntRegType to, x86IntRegType from )
{
write16( 0x0F0F );
ModRM( 3, to, from );
write8( 0x94 );
}

View File

@ -1,201 +0,0 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//------------------------------------------------------------------
// 3DNOW instructions
//------------------------------------------------------------------
/* femms */
emitterT void eFEMMS( void )
{
write16<I>( 0x0E0F );
}
emitterT void ePFCMPEQMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0xB0 );
}
emitterT void ePFCMPGTMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0xA0 );
}
emitterT void ePFCMPGEMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x90 );
}
emitterT void ePFADDMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x9E );
}
emitterT void ePFADDRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x9E );
}
emitterT void ePFSUBMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x9A );
}
emitterT void ePFSUBRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x9A );
}
emitterT void ePFMULMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0xB4 );
}
emitterT void ePFMULRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0xB4 );
}
emitterT void ePFRCPMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x96 );
}
emitterT void ePFRCPRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x96 );
}
emitterT void ePFRCPIT1RtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0xA6 );
}
emitterT void ePFRCPIT2RtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0xB6 );
}
emitterT void ePFRSQRTRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x97 );
}
emitterT void ePFRSQIT1RtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0xA7 );
}
emitterT void ePF2IDMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x1D );
}
emitterT void ePF2IDRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x1D );
}
emitterT void ePI2FDMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x0D );
}
emitterT void ePI2FDRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x0D );
}
emitterT void ePFMAXMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0xA4 );
}
emitterT void ePFMAXRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0xA4 );
}
emitterT void ePFMINMtoR( x86IntRegType to, uptr from )
{
write16<I>( 0x0F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( from );
write8<I>( 0x94 );
}
emitterT void ePFMINRtoR( x86IntRegType to, x86IntRegType from )
{
write16<I>( 0x0F0F );
ModRM<I>( 3, to, from );
write8<I>( 0x94 );
}

View File

@ -18,10 +18,8 @@
#include "PrecompiledHeader.h"
#define _EmitterId_ 0
#include "ix86.h"
#include "Misc.h"
#include "ix86_internal.h"
#include "System.h"
#include "Threading.h"
#include "RedtapeWindows.h"
@ -400,6 +398,7 @@ void cpudetectInit()
cpudetectSSE3(recSSE);
HostSys::Munmap( recSSE, 0x1000 );
}
else { Console::Error("Error: Failed to allocate memory for SSE3 State detection."); }
//////////////////////////////////////
// Core Counting!

276
pcsx2/x86/ix86/ix86_fpu.cpp Normal file
View File

@ -0,0 +1,276 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
//------------------------------------------------------------------
// FPU instructions
//------------------------------------------------------------------
/* fild m32 to fpu reg stack */
emitterT void FILD32( u32 from )
{
write8( 0xDB );
ModRM( 0, 0x0, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fistp m32 from fpu reg stack */
emitterT void FISTP32( u32 from )
{
write8( 0xDB );
ModRM( 0, 0x3, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fld m32 to fpu reg stack */
emitterT void FLD32( u32 from )
{
write8( 0xD9 );
ModRM( 0, 0x0, DISP32 );
write32( MEMADDR(from, 4) );
}
// fld st(i)
emitterT void FLD(int st) { write16(0xc0d9+(st<<8)); }
emitterT void FLD1() { write16(0xe8d9); }
emitterT void FLDL2E() { write16(0xead9); }
/* fst m32 from fpu reg stack */
emitterT void FST32( u32 to )
{
write8( 0xD9 );
ModRM( 0, 0x2, DISP32 );
write32( MEMADDR(to, 4) );
}
/* fstp m32 from fpu reg stack */
emitterT void FSTP32( u32 to )
{
write8( 0xD9 );
ModRM( 0, 0x3, DISP32 );
write32( MEMADDR(to, 4) );
}
// fstp st(i)
emitterT void FSTP(int st) { write16(0xd8dd+(st<<8)); }
/* fldcw fpu control word from m16 */
emitterT void FLDCW( u32 from )
{
write8( 0xD9 );
ModRM( 0, 0x5, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fnstcw fpu control word to m16 */
emitterT void FNSTCW( u32 to )
{
write8( 0xD9 );
ModRM( 0, 0x7, DISP32 );
write32( MEMADDR(to, 4) );
}
emitterT void FNSTSWtoAX() { write16(0xE0DF); }
emitterT void FXAM() { write16(0xe5d9); }
emitterT void FDECSTP() { write16(0xf6d9); }
emitterT void FRNDINT() { write16(0xfcd9); }
emitterT void FXCH(int st) { write16(0xc8d9+(st<<8)); }
emitterT void F2XM1() { write16(0xf0d9); }
emitterT void FSCALE() { write16(0xfdd9); }
emitterT void FPATAN(void) { write16(0xf3d9); }
emitterT void FSIN(void) { write16(0xfed9); }
/* fadd ST(src) to fpu reg stack ST(0) */
emitterT void FADD32Rto0( x86IntRegType src )
{
write8( 0xD8 );
write8( 0xC0 + src );
}
/* fadd ST(0) to fpu reg stack ST(src) */
emitterT void FADD320toR( x86IntRegType src )
{
write8( 0xDC );
write8( 0xC0 + src );
}
/* fsub ST(src) to fpu reg stack ST(0) */
emitterT void FSUB32Rto0( x86IntRegType src )
{
write8( 0xD8 );
write8( 0xE0 + src );
}
/* fsub ST(0) to fpu reg stack ST(src) */
emitterT void FSUB320toR( x86IntRegType src )
{
write8( 0xDC );
write8( 0xE8 + src );
}
/* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */
emitterT void FSUBP( void )
{
write8( 0xDE );
write8( 0xE9 );
}
/* fmul ST(src) to fpu reg stack ST(0) */
emitterT void FMUL32Rto0( x86IntRegType src )
{
write8( 0xD8 );
write8( 0xC8 + src );
}
/* fmul ST(0) to fpu reg stack ST(src) */
emitterT void FMUL320toR( x86IntRegType src )
{
write8( 0xDC );
write8( 0xC8 + src );
}
/* fdiv ST(src) to fpu reg stack ST(0) */
emitterT void FDIV32Rto0( x86IntRegType src )
{
write8( 0xD8 );
write8( 0xF0 + src );
}
/* fdiv ST(0) to fpu reg stack ST(src) */
emitterT void FDIV320toR( x86IntRegType src )
{
write8( 0xDC );
write8( 0xF8 + src );
}
emitterT void FDIV320toRP( x86IntRegType src )
{
write8( 0xDE );
write8( 0xF8 + src );
}
/* fadd m32 to fpu reg stack */
emitterT void FADD32( u32 from )
{
write8( 0xD8 );
ModRM( 0, 0x0, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fsub m32 to fpu reg stack */
emitterT void FSUB32( u32 from )
{
write8( 0xD8 );
ModRM( 0, 0x4, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fmul m32 to fpu reg stack */
emitterT void FMUL32( u32 from )
{
write8( 0xD8 );
ModRM( 0, 0x1, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fdiv m32 to fpu reg stack */
emitterT void FDIV32( u32 from )
{
write8( 0xD8 );
ModRM( 0, 0x6, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fabs fpu reg stack */
emitterT void FABS( void )
{
write16( 0xE1D9 );
}
/* fsqrt fpu reg stack */
emitterT void FSQRT( void )
{
write16( 0xFAD9 );
}
/* fchs fpu reg stack */
emitterT void FCHS( void )
{
write16( 0xE0D9 );
}
/* fcomi st, st(i) */
emitterT void FCOMI( x86IntRegType src )
{
write8( 0xDB );
write8( 0xF0 + src );
}
/* fcomip st, st(i) */
emitterT void FCOMIP( x86IntRegType src )
{
write8( 0xDF );
write8( 0xF0 + src );
}
/* fucomi st, st(i) */
emitterT void FUCOMI( x86IntRegType src )
{
write8( 0xDB );
write8( 0xE8 + src );
}
/* fucomip st, st(i) */
emitterT void FUCOMIP( x86IntRegType src )
{
write8( 0xDF );
write8( 0xE8 + src );
}
/* fcom m32 to fpu reg stack */
emitterT void FCOM32( u32 from )
{
write8( 0xD8 );
ModRM( 0, 0x2, DISP32 );
write32( MEMADDR(from, 4) );
}
/* fcomp m32 to fpu reg stack */
emitterT void FCOMP32( u32 from )
{
write8( 0xD8 );
ModRM( 0, 0x3, DISP32 );
write32( MEMADDR(from, 4) );
}
#define FCMOV32( low, high ) \
{ \
write8( low ); \
write8( high + from ); \
}
emitterT void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); }
emitterT void FCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); }
emitterT void FCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); }
emitterT void FCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); }
emitterT void FCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); }
emitterT void FCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); }
emitterT void FCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); }
emitterT void FCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); }

View File

@ -1,276 +0,0 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//#include "PrecompiledHeader.h"
//------------------------------------------------------------------
// FPU instructions
//------------------------------------------------------------------
/* fild m32 to fpu reg stack */
emitterT void eFILD32( u32 from )
{
write8<I>( 0xDB );
ModRM<I>( 0, 0x0, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fistp m32 from fpu reg stack */
emitterT void eFISTP32( u32 from )
{
write8<I>( 0xDB );
ModRM<I>( 0, 0x3, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fld m32 to fpu reg stack */
emitterT void eFLD32( u32 from )
{
write8<I>( 0xD9 );
ModRM<I>( 0, 0x0, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
// fld st(i)
emitterT void eFLD(int st) { write16<I>(0xc0d9+(st<<8)); }
emitterT void eFLD1() { write16<I>(0xe8d9); }
emitterT void eFLDL2E() { write16<I>(0xead9); }
/* fst m32 from fpu reg stack */
emitterT void eFST32( u32 to )
{
write8<I>( 0xD9 );
ModRM<I>( 0, 0x2, DISP32 );
write32<I>( MEMADDR(to, 4) );
}
/* fstp m32 from fpu reg stack */
emitterT void eFSTP32( u32 to )
{
write8<I>( 0xD9 );
ModRM<I>( 0, 0x3, DISP32 );
write32<I>( MEMADDR(to, 4) );
}
// fstp st(i)
emitterT void eFSTP(int st) { write16<I>(0xd8dd+(st<<8)); }
/* fldcw fpu control word from m16 */
emitterT void eFLDCW( u32 from )
{
write8<I>( 0xD9 );
ModRM<I>( 0, 0x5, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fnstcw fpu control word to m16 */
emitterT void eFNSTCW( u32 to )
{
write8<I>( 0xD9 );
ModRM<I>( 0, 0x7, DISP32 );
write32<I>( MEMADDR(to, 4) );
}
emitterT void eFNSTSWtoAX() { write16<I>(0xE0DF); }
emitterT void eFXAM() { write16<I>(0xe5d9); }
emitterT void eFDECSTP() { write16<I>(0xf6d9); }
emitterT void eFRNDINT() { write16<I>(0xfcd9); }
emitterT void eFXCH(int st) { write16<I>(0xc8d9+(st<<8)); }
emitterT void eF2XM1() { write16<I>(0xf0d9); }
emitterT void eFSCALE() { write16<I>(0xfdd9); }
emitterT void eFPATAN(void) { write16<I>(0xf3d9); }
emitterT void eFSIN(void) { write16<I>(0xfed9); }
/* fadd ST(src) to fpu reg stack ST(0) */
emitterT void eFADD32Rto0( x86IntRegType src )
{
write8<I>( 0xD8 );
write8<I>( 0xC0 + src );
}
/* fadd ST(0) to fpu reg stack ST(src) */
emitterT void eFADD320toR( x86IntRegType src )
{
write8<I>( 0xDC );
write8<I>( 0xC0 + src );
}
/* fsub ST(src) to fpu reg stack ST(0) */
emitterT void eFSUB32Rto0( x86IntRegType src )
{
write8<I>( 0xD8 );
write8<I>( 0xE0 + src );
}
/* fsub ST(0) to fpu reg stack ST(src) */
emitterT void eFSUB320toR( x86IntRegType src )
{
write8<I>( 0xDC );
write8<I>( 0xE8 + src );
}
/* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */
emitterT void eFSUBP( void )
{
write8<I>( 0xDE );
write8<I>( 0xE9 );
}
/* fmul ST(src) to fpu reg stack ST(0) */
emitterT void eFMUL32Rto0( x86IntRegType src )
{
write8<I>( 0xD8 );
write8<I>( 0xC8 + src );
}
/* fmul ST(0) to fpu reg stack ST(src) */
emitterT void eFMUL320toR( x86IntRegType src )
{
write8<I>( 0xDC );
write8<I>( 0xC8 + src );
}
/* fdiv ST(src) to fpu reg stack ST(0) */
emitterT void eFDIV32Rto0( x86IntRegType src )
{
write8<I>( 0xD8 );
write8<I>( 0xF0 + src );
}
/* fdiv ST(0) to fpu reg stack ST(src) */
emitterT void eFDIV320toR( x86IntRegType src )
{
write8<I>( 0xDC );
write8<I>( 0xF8 + src );
}
emitterT void eFDIV320toRP( x86IntRegType src )
{
write8<I>( 0xDE );
write8<I>( 0xF8 + src );
}
/* fadd m32 to fpu reg stack */
emitterT void eFADD32( u32 from )
{
write8<I>( 0xD8 );
ModRM<I>( 0, 0x0, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fsub m32 to fpu reg stack */
emitterT void eFSUB32( u32 from )
{
write8<I>( 0xD8 );
ModRM<I>( 0, 0x4, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fmul m32 to fpu reg stack */
emitterT void eFMUL32( u32 from )
{
write8<I>( 0xD8 );
ModRM<I>( 0, 0x1, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fdiv m32 to fpu reg stack */
emitterT void eFDIV32( u32 from )
{
write8<I>( 0xD8 );
ModRM<I>( 0, 0x6, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fabs fpu reg stack */
emitterT void eFABS( void )
{
write16<I>( 0xE1D9 );
}
/* fsqrt fpu reg stack */
emitterT void eFSQRT( void )
{
write16<I>( 0xFAD9 );
}
/* fchs fpu reg stack */
emitterT void eFCHS( void )
{
write16<I>( 0xE0D9 );
}
/* fcomi st, st(i) */
emitterT void eFCOMI( x86IntRegType src )
{
write8<I>( 0xDB );
write8<I>( 0xF0 + src );
}
/* fcomip st, st(i) */
emitterT void eFCOMIP( x86IntRegType src )
{
write8<I>( 0xDF );
write8<I>( 0xF0 + src );
}
/* fucomi st, st(i) */
emitterT void eFUCOMI( x86IntRegType src )
{
write8<I>( 0xDB );
write8<I>( 0xE8 + src );
}
/* fucomip st, st(i) */
emitterT void eFUCOMIP( x86IntRegType src )
{
write8<I>( 0xDF );
write8<I>( 0xE8 + src );
}
/* fcom m32 to fpu reg stack */
emitterT void eFCOM32( u32 from )
{
write8<I>( 0xD8 );
ModRM<I>( 0, 0x2, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* fcomp m32 to fpu reg stack */
emitterT void eFCOMP32( u32 from )
{
write8<I>( 0xD8 );
ModRM<I>( 0, 0x3, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
#define FCMOV32( low, high ) \
{ \
write8<I>( low ); \
write8<I>( high + from ); \
}
emitterT void eFCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); }
emitterT void eFCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); }
emitterT void eFCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); }
emitterT void eFCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); }
emitterT void eFCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); }
emitterT void eFCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); }
emitterT void eFCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); }
emitterT void eFCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); }

View File

@ -0,0 +1,225 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
//------------------------------------------------------------------
// x86 Group 1 Instructions
//------------------------------------------------------------------
// Group 1 instructions all adhere to the same encoding scheme, and so they all
// share the same emitter which has been coded here.
//
// Group 1 Table: [column value is the Reg field of the ModRM byte]
//
// 0 1 2 3 4 5 6 7
// ADD OR ADC SBB AND SUB XOR CMP
//
namespace x86Emitter {
//////////////////////////////////////////////////////////////////////////////////////////
// x86RegConverter - this class is used internally by the emitter as a helper for
// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API
// can use type-safe 8/16/32 bit register types, and the underlying code can use a single
// unified emitter to generate all function variations + prefixes and such. :)
//
class x86RegConverter : public x86Register32
{
public:
x86RegConverter( x86Register32 src ) : x86Register32( src ) {}
x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {}
x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {}
};
enum Group1InstructionType
{
G1Type_ADD=0,
G1Type_OR,
G1Type_ADC,
G1Type_SBB,
G1Type_AND,
G1Type_SUB,
G1Type_XOR,
G1Type_CMP
};
static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false )
{
write8( (bit8form ? 0 : 1) | (inst<<3) );
ModRM( 3, from.Id, to.Id );
}
static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false )
{
write8( (bit8form ? 0 : 1) | (inst<<3) );
EmitSibMagic( from, sibdest );
}
static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false )
{
write8( (bit8form ? 2 : 3) | (inst<<3) );
EmitSibMagic( to, sibsrc );
}
// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit
// instruction (AX,BX,etc).
template< typename T >
static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm )
{
bool bit8form = (sizeof(T) == 1);
if( !bit8form && is_s8( imm ) )
{
write8( 0x83 );
ModRM( 3, inst, to.Id );
write8( (s8)imm );
}
else
{
if( to == eax )
write8( (bit8form ? 4 : 5) | (inst<<3) );
else
{
write8( bit8form ? 0x80 : 0x81 );
ModRM( 3, inst, to.Id );
}
x86write<T>( imm );
}
}
// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit
// instruction (AX,BX,etc).
template< typename T >
static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm )
{
bool bit8form = (sizeof(T) == 1);
write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) );
EmitSibMagic( inst, sibdest );
if( !bit8form && is_s8( imm ) )
write8( (s8)imm );
else
x86write<T>( imm );
}
// 16 bit instruction prefix!
static __forceinline void prefix16() { write8(0x66); }
//////////////////////////////////////////////////////////////////////////////////////////
//
#define DEFINE_GROUP1_OPCODE( cod ) \
emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \
emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \
emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \
emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \
emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \
emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \
emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \
emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \
\
emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \
emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \
emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \
emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \
emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \
emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \
emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \
emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \
\
emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \
emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \
emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \
emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \
emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \
emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \
emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \
emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); }
DEFINE_GROUP1_OPCODE( ADD )
DEFINE_GROUP1_OPCODE( CMP )
DEFINE_GROUP1_OPCODE( OR )
DEFINE_GROUP1_OPCODE( ADC )
DEFINE_GROUP1_OPCODE( SBB )
DEFINE_GROUP1_OPCODE( AND )
DEFINE_GROUP1_OPCODE( SUB )
DEFINE_GROUP1_OPCODE( XOR )
} // end namespace x86Emitter
static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src )
{
return x86Emitter::x86Register32( src );
}
static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src )
{
return x86Emitter::x86Register16( src );
}
static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src )
{
return x86Emitter::x86Register8( src );
}
static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src )
{
return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
#define DEFINE_LEGACY_HELPER( cod, bits ) \
emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \
emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \
emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \
emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \
emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \
emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); }
#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \
DEFINE_LEGACY_HELPER( cod, 32 ) \
DEFINE_LEGACY_HELPER( cod, 16 ) \
DEFINE_LEGACY_HELPER( cod, 8 )
DEFINE_GROUP1_OPCODE_LEGACY( ADD )
DEFINE_GROUP1_OPCODE_LEGACY( CMP )
DEFINE_GROUP1_OPCODE_LEGACY( OR )
DEFINE_GROUP1_OPCODE_LEGACY( ADC )
DEFINE_GROUP1_OPCODE_LEGACY( SBB )
DEFINE_GROUP1_OPCODE_LEGACY( AND )
DEFINE_GROUP1_OPCODE_LEGACY( SUB )
DEFINE_GROUP1_OPCODE_LEGACY( XOR )
// Special forms needed by the legacy emitter syntax:
emitterT void AND32I8toR( x86IntRegType to, s8 from )
{
x86Emitter::AND32( _reghlp32(to), from );
}
emitterT void AND32I8toM( uptr to, s8 from )
{
x86Emitter::AND32( (void*)to, from );
}

View File

@ -0,0 +1,43 @@
#pragma once
#include "ix86.h"
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define MEMADDR(addr, oplen) (addr)
#define Rex(w,r,x,b) assert(0)
#define RexR(w, reg) assert( !(w || (reg)>=8) )
#define RexB(w, base) assert( !(w || (base)>=8) )
#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) )
#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) )
#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask))
static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field)
static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
//------------------------------------------------------------------
// General Emitter Helper functions
//------------------------------------------------------------------
namespace x86Emitter
{
extern void EmitSibMagic( int regfield, const ModSib& info );
extern void EmitSibMagic( x86Register32 regfield, const ModSib& info );
extern bool NeedsSibMagic( const ModSib& info );
}
// From here out are the legacy (old) emitter functions...
extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
extern void ModRM( int mod, int reg, int rm );
extern void SibSB( int ss, int index, int base );
extern void SET8R( int cc, int to );
extern u8* J8Rel( int cc, int to );
extern u32* J32Rel( int cc, u32 to );
extern u64 GetCPUTick( void );
//------------------------------------------------------------------

File diff suppressed because it is too large Load Diff

View File

@ -51,66 +51,51 @@
//------------------------------------------------------------------
// mov instructions
//------------------------------------------------------------------
#define MOV64RtoR eMOV64RtoR<_EmitterId_>
#define MOV64RtoM eMOV64RtoM<_EmitterId_>
#define MOV64MtoR eMOV64MtoR<_EmitterId_>
#define MOV64I32toM eMOV64I32toM<_EmitterId_>
#define MOV64I32toR eMOV64I32toR<_EmitterId_>
#define MOV64ItoR eMOV64ItoR<_EmitterId_>
#define MOV64ItoRmOffset eMOV64ItoRmOffset<_EmitterId_>
#define MOV64RmOffsettoR eMOV64RmOffsettoR<_EmitterId_>
#define MOV64RmStoR eMOV64RmStoR<_EmitterId_>
#define MOV64RtoRmOffset eMOV64RtoRmOffset<_EmitterId_>
#define MOV64RtoRmS eMOV64RtoRmS<_EmitterId_>
#define MOV32RtoR eMOV32RtoR<_EmitterId_>
#define MOV32RtoM eMOV32RtoM<_EmitterId_>
#define MOV32MtoR eMOV32MtoR<_EmitterId_>
#define MOV32RmtoR eMOV32RmtoR<_EmitterId_>
#define MOV32RmtoROffset eMOV32RmtoROffset<_EmitterId_>
#define MOV32RmtoR eMOV32RmtoR<_EmitterId_>
#define MOV32RmStoR eMOV32RmStoR<_EmitterId_>
#define MOV32RmSOffsettoR eMOV32RmSOffsettoR<_EmitterId_>
#define MOV32RtoRm eMOV32RtoRm<_EmitterId_>
#define MOV32RtoRmS eMOV32RtoRmS<_EmitterId_>
#define MOV32ItoR eMOV32ItoR<_EmitterId_>
#define MOV32ItoM eMOV32ItoM<_EmitterId_>
#define MOV32ItoRmOffset eMOV32ItoRmOffset<_EmitterId_>
#define MOV32RtoRmOffset eMOV32RtoRmOffset<_EmitterId_>
#define MOV32ItoRm eMOV32ItoRm<_EmitterId_>
#define MOV32RtoRm eMOV32RtoRm<_EmitterId_>
#define MOV16RtoM eMOV16RtoM<_EmitterId_>
#define MOV16MtoR eMOV16MtoR<_EmitterId_>
#define MOV16RmtoR eMOV16RmtoR<_EmitterId_>
#define MOV16RmtoROffset eMOV16RmtoROffset<_EmitterId_>
#define MOV16RmtoR eMOV16RmtoR<_EmitterId_>
#define MOV16RmSOffsettoR eMOV16RmSOffsettoR<_EmitterId_>
#define MOV16RtoRm eMOV16RtoRm<_EmitterId_>
#define MOV16ItoM eMOV16ItoM<_EmitterId_>
#define MOV16RtoRmS eMOV16RtoRmS<_EmitterId_>
#define MOV16ItoR eMOV16ItoR<_EmitterId_>
#define MOV16ItoRmOffset eMOV16ItoRmOffset<_EmitterId_>
#define MOV16RtoRmOffset eMOV16RtoRmOffset<_EmitterId_>
#define MOV16ItoRm eMOV16ItoRm<_EmitterId_>
#define MOV16RtoRm eMOV16RtoRm<_EmitterId_>
#define MOV8RtoM eMOV8RtoM<_EmitterId_>
#define MOV8MtoR eMOV8MtoR<_EmitterId_>
#define MOV8RmtoR eMOV8RmtoR<_EmitterId_>
#define MOV8RmtoROffset eMOV8RmtoROffset<_EmitterId_>
#define MOV8RmtoR eMOV8RmtoR<_EmitterId_>
#define MOV8RmSOffsettoR eMOV8RmSOffsettoR<_EmitterId_>
#define MOV8RtoRm eMOV8RtoRm<_EmitterId_>
#define MOV8ItoM eMOV8ItoM<_EmitterId_>
#define MOV8ItoR eMOV8ItoR<_EmitterId_>
#define MOV8ItoRmOffset eMOV8ItoRmOffset<_EmitterId_>
#define MOV8RtoRmOffset eMOV8RtoRmOffset<_EmitterId_>
#define MOV8ItoRm eMOV8ItoRm<_EmitterId_>
#define MOV8RtoRm eMOV8RtoRm<_EmitterId_>
#define MOVSX32R8toR eMOVSX32R8toR<_EmitterId_>
#define MOVSX32Rm8toR eMOVSX32Rm8toR<_EmitterId_>
#define MOVSX32Rm8toROffset eMOVSX32Rm8toROffset<_EmitterId_>
#define MOVSX32M8toR eMOVSX32M8toR<_EmitterId_>
#define MOVSX32R16toR eMOVSX32R16toR<_EmitterId_>
#define MOVSX32Rm16toR eMOVSX32Rm16toR<_EmitterId_>
#define MOVSX32Rm16toROffset eMOVSX32Rm16toROffset<_EmitterId_>
#define MOVSX32M16toR eMOVSX32M16toR<_EmitterId_>
#define MOVZX32R8toR eMOVZX32R8toR<_EmitterId_>
#define MOVZX32Rm8toR eMOVZX32Rm8toR<_EmitterId_>
#define MOVZX32Rm8toROffset eMOVZX32Rm8toROffset<_EmitterId_>
#define MOVZX32M8toR eMOVZX32M8toR<_EmitterId_>
#define MOVZX32R16toR eMOVZX32R16toR<_EmitterId_>
#define MOVZX32Rm16toR eMOVZX32Rm16toR<_EmitterId_>
#define MOVZX32Rm16toROffset eMOVZX32Rm16toROffset<_EmitterId_>
#define MOVZX32M16toR eMOVZX32M16toR<_EmitterId_>
#define CMOVBE32RtoR eCMOVBE32RtoR<_EmitterId_>
#define CMOVBE32MtoR eCMOVBE32MtoR<_EmitterId_>
@ -147,12 +132,10 @@
//------------------------------------------------------------------
// arithmetic instructions
//------------------------------------------------------------------
#define ADD64ItoR eADD64ItoR<_EmitterId_>
#define ADD64MtoR eADD64MtoR<_EmitterId_>
#define ADD32ItoEAX eADD32ItoEAX<_EmitterId_>
#define ADD32ItoR eADD32ItoR<_EmitterId_>
#define ADD32ItoM eADD32ItoM<_EmitterId_>
#define ADD32ItoRmOffset eADD32ItoRmOffset<_EmitterId_>
#define ADD32ItoRm eADD32ItoRm<_EmitterId_>
#define ADD32RtoR eADD32RtoR<_EmitterId_>
#define ADD32RtoM eADD32RtoM<_EmitterId_>
#define ADD32MtoR eADD32MtoR<_EmitterId_>
@ -171,7 +154,6 @@
#define INC32M eINC32M<_EmitterId_>
#define INC16R eINC16R<_EmitterId_>
#define INC16M eINC16M<_EmitterId_>
#define SUB64MtoR eSUB64MtoR<_EmitterId_>
#define SUB32ItoR eSUB32ItoR<_EmitterId_>
#define SUB32ItoM eSUB32ItoM<_EmitterId_>
#define SUB32RtoR eSUB32RtoR<_EmitterId_>
@ -181,7 +163,6 @@
#define SUB16ItoR eSUB16ItoR<_EmitterId_>
#define SUB16ItoM eSUB16ItoM<_EmitterId_>
#define SUB16MtoR eSUB16MtoR<_EmitterId_>
#define SBB64RtoR eSBB64RtoR<_EmitterId_>
#define SBB32ItoR eSBB32ItoR<_EmitterId_>
#define SBB32ItoM eSBB32ItoM<_EmitterId_>
#define SBB32RtoR eSBB32RtoR<_EmitterId_>
@ -203,12 +184,6 @@
//------------------------------------------------------------------
// shifting instructions
//------------------------------------------------------------------
#define SHL64ItoR eSHL64ItoR<_EmitterId_>
#define SHL64CLtoR eSHL64CLtoR<_EmitterId_>
#define SHR64ItoR eSHR64ItoR<_EmitterId_>
#define SHR64CLtoR eSHR64CLtoR<_EmitterId_>
#define SAR64ItoR eSAR64ItoR<_EmitterId_>
#define SAR64CLtoR eSAR64CLtoR<_EmitterId_>
#define SHL32ItoR eSHL32ItoR<_EmitterId_>
#define SHL32ItoM eSHL32ItoM<_EmitterId_>
#define SHL32CLtoR eSHL32CLtoR<_EmitterId_>
@ -231,10 +206,6 @@
//------------------------------------------------------------------
// logical instructions
//------------------------------------------------------------------
#define OR64ItoR eOR64ItoR<_EmitterId_>
#define OR64MtoR eOR64MtoR<_EmitterId_>
#define OR64RtoR eOR64RtoR<_EmitterId_>
#define OR64RtoM eOR64RtoM<_EmitterId_>
#define OR32ItoR eOR32ItoR<_EmitterId_>
#define OR32ItoM eOR32ItoM<_EmitterId_>
#define OR32RtoR eOR32RtoR<_EmitterId_>
@ -249,11 +220,6 @@
#define OR8RtoM eOR8RtoM<_EmitterId_>
#define OR8ItoM eOR8ItoM<_EmitterId_>
#define OR8MtoR eOR8MtoR<_EmitterId_>
#define XOR64ItoR eXOR64ItoR<_EmitterId_>
#define XOR64RtoR eXOR64RtoR<_EmitterId_>
#define XOR64MtoR eXOR64MtoR<_EmitterId_>
#define XOR64RtoR eXOR64RtoR<_EmitterId_>
#define XOR64RtoM eXOR64RtoM<_EmitterId_>
#define XOR32ItoR eXOR32ItoR<_EmitterId_>
#define XOR32ItoM eXOR32ItoM<_EmitterId_>
#define XOR32RtoR eXOR32RtoR<_EmitterId_>
@ -262,11 +228,6 @@
#define XOR32MtoR eXOR32MtoR<_EmitterId_>
#define XOR16RtoM eXOR16RtoM<_EmitterId_>
#define XOR16ItoR eXOR16ItoR<_EmitterId_>
#define AND64I32toR eAND64I32toR<_EmitterId_>
#define AND64MtoR eAND64MtoR<_EmitterId_>
#define AND64RtoM eAND64RtoM<_EmitterId_>
#define AND64RtoR eAND64RtoR<_EmitterId_>
#define AND64I32toM eAND64I32toM<_EmitterId_>
#define AND32ItoR eAND32ItoR<_EmitterId_>
#define AND32I8toR eAND32I8toR<_EmitterId_>
#define AND32ItoM eAND32ItoM<_EmitterId_>
@ -275,7 +236,7 @@
#define AND32RtoM eAND32RtoM<_EmitterId_>
#define AND32MtoR eAND32MtoR<_EmitterId_>
#define AND32RmtoR eAND32RmtoR<_EmitterId_>
#define AND32RmtoROffset eAND32RmtoROffset<_EmitterId_>
#define AND32RmtoR eAND32RmtoR<_EmitterId_>
#define AND16RtoR eAND16RtoR<_EmitterId_>
#define AND16ItoR eAND16ItoR<_EmitterId_>
#define AND16ItoM eAND16ItoM<_EmitterId_>
@ -286,11 +247,8 @@
#define AND8RtoM eAND8RtoM<_EmitterId_>
#define AND8MtoR eAND8MtoR<_EmitterId_>
#define AND8RtoR eAND8RtoR<_EmitterId_>
#define BTS32MtoR eBTS32MtoR<_EmitterId_>
#define NOT64R eNOT64R<_EmitterId_>
#define NOT32R eNOT32R<_EmitterId_>
#define NOT32M eNOT32M<_EmitterId_>
#define NEG64R eNEG64R<_EmitterId_>
#define NEG32R eNEG32R<_EmitterId_>
#define NEG32M eNEG32M<_EmitterId_>
#define NEG16R eNEG16R<_EmitterId_>
@ -350,15 +308,13 @@
//------------------------------------------------------------------
// misc instructions
//------------------------------------------------------------------
#define CMP64I32toR eCMP64I32toR<_EmitterId_>
#define CMP64MtoR eCMP64MtoR<_EmitterId_>
#define CMP64RtoR eCMP64RtoR<_EmitterId_>
#define CMP32ItoR eCMP32ItoR<_EmitterId_>
#define CMP32ItoM eCMP32ItoM<_EmitterId_>
#define CMP32RtoR eCMP32RtoR<_EmitterId_>
#define CMP32MtoR eCMP32MtoR<_EmitterId_>
#define CMP32ItoRm eCMP32ItoRm<_EmitterId_>
#define CMP8I8toRm eCMP8I8toRm<_EmitterId_>
#define CMP32I8toRm eCMP32I8toRm<_EmitterId_>
#define CMP32I8toRmOffset8 eCMP32I8toRmOffset8<_EmitterId_>
#define CMP32I8toM eCMP32I8toM<_EmitterId_>
#define CMP16ItoR eCMP16ItoR<_EmitterId_>
#define CMP16ItoM eCMP16ItoM<_EmitterId_>
@ -540,16 +496,16 @@
#define PUNPCKHDQMtoR ePUNPCKHDQMtoR<_EmitterId_>
#define MOVQ64ItoR eMOVQ64ItoR<_EmitterId_>
#define MOVQRtoR eMOVQRtoR<_EmitterId_>
#define MOVQRmtoROffset eMOVQRmtoROffset<_EmitterId_>
#define MOVQRtoRmOffset eMOVQRtoRmOffset<_EmitterId_>
#define MOVQRmtoR eMOVQRmtoR<_EmitterId_>
#define MOVQRtoRm eMOVQRtoRm<_EmitterId_>
#define MOVDMtoMMX eMOVDMtoMMX<_EmitterId_>
#define MOVDMMXtoM eMOVDMMXtoM<_EmitterId_>
#define MOVD32RtoMMX eMOVD32RtoMMX<_EmitterId_>
#define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_>
#define MOVD32RmOffsettoMMX eMOVD32RmOffsettoMMX<_EmitterId_>
#define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_>
#define MOVD32MMXtoR eMOVD32MMXtoR<_EmitterId_>
#define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_>
#define MOVD32MMXtoRmOffset eMOVD32MMXtoRmOffset<_EmitterId_>
#define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_>
#define PINSRWRtoMMX ePINSRWRtoMMX<_EmitterId_>
#define PSHUFWRtoR ePSHUFWRtoR<_EmitterId_>
#define PSHUFWMtoR ePSHUFWMtoR<_EmitterId_>
@ -576,33 +532,31 @@
#define SSE_MOVSS_XMM_to_M32 eSSE_MOVSS_XMM_to_M32<_EmitterId_>
#define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_>
#define SSE_MOVSS_XMM_to_XMM eSSE_MOVSS_XMM_to_XMM<_EmitterId_>
#define SSE_MOVSS_RmOffset_to_XMM eSSE_MOVSS_RmOffset_to_XMM<_EmitterId_>
#define SSE_MOVSS_XMM_to_RmOffset eSSE_MOVSS_XMM_to_RmOffset<_EmitterId_>
#define SSE_MOVSS_Rm_to_XMM eSSE_MOVSS_Rm_to_XMM<_EmitterId_>
#define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_>
#define SSE_MASKMOVDQU_XMM_to_XMM eSSE_MASKMOVDQU_XMM_to_XMM<_EmitterId_>
#define SSE_MOVLPS_M64_to_XMM eSSE_MOVLPS_M64_to_XMM<_EmitterId_>
#define SSE_MOVLPS_XMM_to_M64 eSSE_MOVLPS_XMM_to_M64<_EmitterId_>
#define SSE_MOVLPS_RmOffset_to_XMM eSSE_MOVLPS_RmOffset_to_XMM<_EmitterId_>
#define SSE_MOVLPS_XMM_to_RmOffset eSSE_MOVLPS_XMM_to_RmOffset<_EmitterId_>
#define SSE_MOVLPS_Rm_to_XMM eSSE_MOVLPS_Rm_to_XMM<_EmitterId_>
#define SSE_MOVLPS_XMM_to_Rm eSSE_MOVLPS_XMM_to_Rm<_EmitterId_>
#define SSE_MOVHPS_M64_to_XMM eSSE_MOVHPS_M64_to_XMM<_EmitterId_>
#define SSE_MOVHPS_XMM_to_M64 eSSE_MOVHPS_XMM_to_M64<_EmitterId_>
#define SSE_MOVHPS_RmOffset_to_XMM eSSE_MOVHPS_RmOffset_to_XMM<_EmitterId_>
#define SSE_MOVHPS_XMM_to_RmOffset eSSE_MOVHPS_XMM_to_RmOffset<_EmitterId_>
#define SSE_MOVHPS_Rm_to_XMM eSSE_MOVHPS_Rm_to_XMM<_EmitterId_>
#define SSE_MOVHPS_XMM_to_Rm eSSE_MOVHPS_XMM_to_Rm<_EmitterId_>
#define SSE_MOVLHPS_XMM_to_XMM eSSE_MOVLHPS_XMM_to_XMM<_EmitterId_>
#define SSE_MOVHLPS_XMM_to_XMM eSSE_MOVHLPS_XMM_to_XMM<_EmitterId_>
#define SSE_MOVLPSRmtoR eSSE_MOVLPSRmtoR<_EmitterId_>
#define SSE_MOVLPSRmtoROffset eSSE_MOVLPSRmtoROffset<_EmitterId_>
#define SSE_MOVLPSRtoRm eSSE_MOVLPSRtoRm<_EmitterId_>
#define SSE_MOVLPSRtoRmOffset eSSE_MOVLPSRtoRmOffset<_EmitterId_>
#define SSE_MOVAPSRmStoR eSSE_MOVAPSRmStoR<_EmitterId_>
#define SSE_MOVAPSRtoRmS eSSE_MOVAPSRtoRmS<_EmitterId_>
#define SSE_MOVAPSRtoRmOffset eSSE_MOVAPSRtoRmOffset<_EmitterId_>
#define SSE_MOVAPSRmtoROffset eSSE_MOVAPSRmtoROffset<_EmitterId_>
#define SSE_MOVAPSRtoRm eSSE_MOVAPSRtoRm<_EmitterId_>
#define SSE_MOVAPSRmtoR eSSE_MOVAPSRmtoR<_EmitterId_>
#define SSE_MOVUPSRmStoR eSSE_MOVUPSRmStoR<_EmitterId_>
#define SSE_MOVUPSRtoRmS eSSE_MOVUPSRtoRmS<_EmitterId_>
#define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_>
#define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_>
#define SSE_MOVUPSRmtoROffset eSSE_MOVUPSRmtoROffset<_EmitterId_>
#define SSE_MOVUPSRtoRmOffset eSSE_MOVUPSRtoRmOffset<_EmitterId_>
#define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_>
#define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_>
#define SSE_RCPPS_XMM_to_XMM eSSE_RCPPS_XMM_to_XMM<_EmitterId_>
#define SSE_RCPPS_M128_to_XMM eSSE_RCPPS_M128_to_XMM<_EmitterId_>
#define SSE_RCPSS_XMM_to_XMM eSSE_RCPSS_XMM_to_XMM<_EmitterId_>
@ -677,7 +631,7 @@
#define SSE_UNPCKHPS_XMM_to_XMM eSSE_UNPCKHPS_XMM_to_XMM<_EmitterId_>
#define SSE_SHUFPS_XMM_to_XMM eSSE_SHUFPS_XMM_to_XMM<_EmitterId_>
#define SSE_SHUFPS_M128_to_XMM eSSE_SHUFPS_M128_to_XMM<_EmitterId_>
#define SSE_SHUFPS_RmOffset_to_XMM eSSE_SHUFPS_RmOffset_to_XMM<_EmitterId_>
#define SSE_SHUFPS_Rm_to_XMM eSSE_SHUFPS_Rm_to_XMM<_EmitterId_>
#define SSE_CMPEQPS_M128_to_XMM eSSE_CMPEQPS_M128_to_XMM<_EmitterId_>
#define SSE_CMPEQPS_XMM_to_XMM eSSE_CMPEQPS_XMM_to_XMM<_EmitterId_>
#define SSE_CMPLTPS_M128_to_XMM eSSE_CMPLTPS_M128_to_XMM<_EmitterId_>
@ -781,8 +735,8 @@
#define SSE2_MOVQ_XMM_to_M64 eSSE2_MOVQ_XMM_to_M64<_EmitterId_>
#define SSE2_MOVDQ2Q_XMM_to_MM eSSE2_MOVDQ2Q_XMM_to_MM<_EmitterId_>
#define SSE2_MOVQ2DQ_MM_to_XMM eSSE2_MOVQ2DQ_MM_to_XMM<_EmitterId_>
#define SSE2_MOVDQARtoRmOffset eSSE2_MOVDQARtoRmOffset<_EmitterId_>
#define SSE2_MOVDQARmtoROffset eSSE2_MOVDQARmtoROffset<_EmitterId_>
#define SSE2_MOVDQARtoRm eSSE2_MOVDQARtoRm<_EmitterId_>
#define SSE2_MOVDQARmtoR eSSE2_MOVDQARmtoR<_EmitterId_>
#define SSE2_CVTDQ2PS_M128_to_XMM eSSE2_CVTDQ2PS_M128_to_XMM<_EmitterId_>
#define SSE2_CVTDQ2PS_XMM_to_XMM eSSE2_CVTDQ2PS_XMM_to_XMM<_EmitterId_>
#define SSE2_CVTPS2DQ_M128_to_XMM eSSE2_CVTPS2DQ_M128_to_XMM<_EmitterId_>
@ -921,11 +875,11 @@
#define SSE2_MOVD_M32_to_XMM eSSE2_MOVD_M32_to_XMM<_EmitterId_>
#define SSE2_MOVD_R_to_XMM eSSE2_MOVD_R_to_XMM<_EmitterId_>
#define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_>
#define SSE2_MOVD_RmOffset_to_XMM eSSE2_MOVD_RmOffset_to_XMM<_EmitterId_>
#define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_>
#define SSE2_MOVD_XMM_to_M32 eSSE2_MOVD_XMM_to_M32<_EmitterId_>
#define SSE2_MOVD_XMM_to_R eSSE2_MOVD_XMM_to_R<_EmitterId_>
#define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_>
#define SSE2_MOVD_XMM_to_RmOffset eSSE2_MOVD_XMM_to_RmOffset<_EmitterId_>
#define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_>
#define SSE2_MOVQ_XMM_to_R eSSE2_MOVQ_XMM_to_R<_EmitterId_>
#define SSE2_MOVQ_R_to_XMM eSSE2_MOVQ_R_to_XMM<_EmitterId_>
//------------------------------------------------------------------

584
pcsx2/x86/ix86/ix86_mmx.cpp Normal file
View File

@ -0,0 +1,584 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
//------------------------------------------------------------------
// MMX instructions
//
// note: r64 = mm
//------------------------------------------------------------------
/* movq m64 to r64 */
emitterT void MOVQMtoR( x86MMXRegType to, uptr from )
{
write16( 0x6F0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* movq r64 to m64 */
emitterT void MOVQRtoM( uptr to, x86MMXRegType from )
{
write16( 0x7F0F );
ModRM( 0, from, DISP32 );
write32(MEMADDR(to, 4));
}
/* pand r64 to r64 */
emitterT void PANDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xDB0F );
ModRM( 3, to, from );
}
emitterT void PANDNRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xDF0F );
ModRM( 3, to, from );
}
/* por r64 to r64 */
emitterT void PORRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xEB0F );
ModRM( 3, to, from );
}
/* pxor r64 to r64 */
emitterT void PXORRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xEF0F );
ModRM( 3, to, from );
}
/* psllq r64 to r64 */
emitterT void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xF30F );
ModRM( 3, to, from );
}
/* psllq m64 to r64 */
emitterT void PSLLQMtoR( x86MMXRegType to, uptr from )
{
write16( 0xF30F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* psllq imm8 to r64 */
emitterT void PSLLQItoR( x86MMXRegType to, u8 from )
{
write16( 0x730F );
ModRM( 3, 6, to);
write8( from );
}
/* psrlq r64 to r64 */
emitterT void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xD30F );
ModRM( 3, to, from );
}
/* psrlq m64 to r64 */
emitterT void PSRLQMtoR( x86MMXRegType to, uptr from )
{
write16( 0xD30F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* psrlq imm8 to r64 */
emitterT void PSRLQItoR( x86MMXRegType to, u8 from )
{
write16( 0x730F );
ModRM( 3, 2, to);
write8( from );
}
/* paddusb r64 to r64 */
emitterT void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xDC0F );
ModRM( 3, to, from );
}
/* paddusb m64 to r64 */
emitterT void PADDUSBMtoR( x86MMXRegType to, uptr from )
{
write16( 0xDC0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* paddusw r64 to r64 */
emitterT void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xDD0F );
ModRM( 3, to, from );
}
/* paddusw m64 to r64 */
emitterT void PADDUSWMtoR( x86MMXRegType to, uptr from )
{
write16( 0xDD0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* paddb r64 to r64 */
emitterT void PADDBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xFC0F );
ModRM( 3, to, from );
}
/* paddb m64 to r64 */
emitterT void PADDBMtoR( x86MMXRegType to, uptr from )
{
write16( 0xFC0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* paddw r64 to r64 */
emitterT void PADDWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xFD0F );
ModRM( 3, to, from );
}
/* paddw m64 to r64 */
emitterT void PADDWMtoR( x86MMXRegType to, uptr from )
{
write16( 0xFD0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* paddd r64 to r64 */
emitterT void PADDDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xFE0F );
ModRM( 3, to, from );
}
/* paddd m64 to r64 */
emitterT void PADDDMtoR( x86MMXRegType to, uptr from )
{
write16( 0xFE0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* emms */
emitterT void EMMS()
{
write16( 0x770F );
}
emitterT void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xEC0F );
ModRM( 3, to, from );
}
emitterT void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xED0F );
ModRM( 3, to, from );
}
// paddq m64 to r64 (sse2 only?)
emitterT void PADDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0xD40F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
// paddq r64 to r64 (sse2 only?)
emitterT void PADDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xD40F );
ModRM( 3, to, from );
}
emitterT void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xE80F );
ModRM( 3, to, from );
}
emitterT void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xE90F );
ModRM( 3, to, from );
}
emitterT void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xF80F );
ModRM( 3, to, from );
}
emitterT void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xF90F );
ModRM( 3, to, from );
}
emitterT void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xFA0F );
ModRM( 3, to, from );
}
emitterT void PSUBDMtoR( x86MMXRegType to, uptr from )
{
write16( 0xFA0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xD80F );
ModRM( 3, to, from );
}
emitterT void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xD90F );
ModRM( 3, to, from );
}
// psubq m64 to r64 (sse2 only?)
emitterT void PSUBQMtoR( x86MMXRegType to, uptr from )
{
write16( 0xFB0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
// psubq r64 to r64 (sse2 only?)
emitterT void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xFB0F );
ModRM( 3, to, from );
}
// pmuludq m64 to r64 (sse2 only?)
emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0xF40F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
// pmuludq r64 to r64 (sse2 only?)
emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xF40F );
ModRM( 3, to, from );
}
emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x740F );
ModRM( 3, to, from );
}
emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x750F );
ModRM( 3, to, from );
}
emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x760F );
ModRM( 3, to, from );
}
emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from )
{
write16( 0x760F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x640F );
ModRM( 3, to, from );
}
emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x650F );
ModRM( 3, to, from );
}
emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x660F );
ModRM( 3, to, from );
}
emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from )
{
write16( 0x660F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PSRLWItoR( x86MMXRegType to, u8 from )
{
write16( 0x710F );
ModRM( 3, 2 , to );
write8( from );
}
emitterT void PSRLDItoR( x86MMXRegType to, u8 from )
{
write16( 0x720F );
ModRM( 3, 2 , to );
write8( from );
}
emitterT void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xD20F );
ModRM( 3, to, from );
}
emitterT void PSLLWItoR( x86MMXRegType to, u8 from )
{
write16( 0x710F );
ModRM( 3, 6 , to );
write8( from );
}
emitterT void PSLLDItoR( x86MMXRegType to, u8 from )
{
write16( 0x720F );
ModRM( 3, 6 , to );
write8( from );
}
emitterT void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xF20F );
ModRM( 3, to, from );
}
emitterT void PSRAWItoR( x86MMXRegType to, u8 from )
{
write16( 0x710F );
ModRM( 3, 4 , to );
write8( from );
}
emitterT void PSRADItoR( x86MMXRegType to, u8 from )
{
write16( 0x720F );
ModRM( 3, 4 , to );
write8( from );
}
emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0xE20F );
ModRM( 3, to, from );
}
/* por m64 to r64 */
emitterT void PORMtoR( x86MMXRegType to, uptr from )
{
write16( 0xEB0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* pxor m64 to r64 */
emitterT void PXORMtoR( x86MMXRegType to, uptr from )
{
write16( 0xEF0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* pand m64 to r64 */
emitterT void PANDMtoR( x86MMXRegType to, uptr from )
{
//u64 rip = (u64)x86Ptr + 7;
write16( 0xDB0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PANDNMtoR( x86MMXRegType to, uptr from )
{
write16( 0xDF0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x6A0F );
ModRM( 3, to, from );
}
emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0x6A0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x620F );
ModRM( 3, to, from );
}
emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from )
{
write16( 0x620F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void MOVQ64ItoR( x86MMXRegType reg, u64 i )
{
MOVQMtoR( reg, ( uptr )(x86Ptr) + 2 + 7 );
JMP8( 8 );
write64( i );
}
emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x6F0F );
ModRM( 3, to, from );
}
emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset )
{
write16( 0x6F0F );
WriteRmOffsetFrom( to, from, offset );
}
emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset )
{
write16( 0x7F0F );
WriteRmOffsetFrom( from, to, offset );
}
/* movd m32 to r64 */
emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from )
{
write16( 0x6E0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* movd r64 to m32 */
emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from )
{
write16( 0x7E0F );
ModRM( 0, from, DISP32 );
write32( MEMADDR(to, 4) );
}
emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from )
{
write16( 0x6E0F );
ModRM( 3, to, from );
}
emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset )
{
write16( 0x6E0F );
WriteRmOffsetFrom( to, from, offset );
}
emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from )
{
write16( 0x7E0F );
ModRM( 3, from, to );
}
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset )
{
write16( 0x7E0F );
WriteRmOffsetFrom( from, to, offset );
}
// untested
emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
{
write16( 0x630F );
ModRM( 3, to, from );
}
emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
{
write16( 0x6B0F );
ModRM( 3, to, from );
}
emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from)
{
write16( 0xD70F );
ModRM( 3, to, from );
}
emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
{
if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3);
write16( 0xc40f );
ModRM( 3, to, from );
write8( imm8 );
}
emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8)
{
write16(0x700f);
ModRM( 3, to, from );
write8(imm8);
}
emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8)
{
write16( 0x700f );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
write8(imm8);
}
emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from)
{
write16(0xf70f);
ModRM( 3, to, from );
}

View File

@ -1,647 +0,0 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
//------------------------------------------------------------------
// MMX instructions
//
// note: r64 = mm
//------------------------------------------------------------------
/* movq m64 to r64 */
emitterT void eMOVQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0x6F0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* movq r64 to m64 */
emitterT void eMOVQRtoM( uptr to, x86MMXRegType from )
{
write16<I>( 0x7F0F );
ModRM<I>( 0, from, DISP32 );
write32<I>(MEMADDR(to, 4));
}
/* pand r64 to r64 */
emitterT void ePANDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xDB0F );
ModRM<I>( 3, to, from );
}
emitterT void ePANDNRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xDF0F );
ModRM<I>( 3, to, from );
}
/* por r64 to r64 */
emitterT void ePORRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xEB0F );
ModRM<I>( 3, to, from );
}
/* pxor r64 to r64 */
emitterT void ePXORRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xEF0F );
ModRM<I>( 3, to, from );
}
/* psllq r64 to r64 */
emitterT void ePSLLQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xF30F );
ModRM<I>( 3, to, from );
}
/* psllq m64 to r64 */
emitterT void ePSLLQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xF30F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* psllq imm8 to r64 */
emitterT void ePSLLQItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x730F );
ModRM<I>( 3, 6, to);
write8<I>( from );
}
/* psrlq r64 to r64 */
emitterT void ePSRLQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xD30F );
ModRM<I>( 3, to, from );
}
/* psrlq m64 to r64 */
emitterT void ePSRLQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xD30F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* psrlq imm8 to r64 */
emitterT void ePSRLQItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x730F );
ModRM<I>( 3, 2, to);
write8<I>( from );
}
/* paddusb r64 to r64 */
emitterT void ePADDUSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xDC0F );
ModRM<I>( 3, to, from );
}
/* paddusb m64 to r64 */
emitterT void ePADDUSBMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xDC0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* paddusw r64 to r64 */
emitterT void ePADDUSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xDD0F );
ModRM<I>( 3, to, from );
}
/* paddusw m64 to r64 */
emitterT void ePADDUSWMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xDD0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* paddb r64 to r64 */
emitterT void ePADDBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xFC0F );
ModRM<I>( 3, to, from );
}
/* paddb m64 to r64 */
emitterT void ePADDBMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xFC0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* paddw r64 to r64 */
emitterT void ePADDWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xFD0F );
ModRM<I>( 3, to, from );
}
/* paddw m64 to r64 */
emitterT void ePADDWMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xFD0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* paddd r64 to r64 */
emitterT void ePADDDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xFE0F );
ModRM<I>( 3, to, from );
}
/* paddd m64 to r64 */
emitterT void ePADDDMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xFE0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* emms */
emitterT void eEMMS()
{
write16<I>( 0x770F );
}
emitterT void ePADDSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xEC0F );
ModRM<I>( 3, to, from );
}
emitterT void ePADDSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xED0F );
ModRM<I>( 3, to, from );
}
// paddq m64 to r64 (sse2 only?)
emitterT void ePADDQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xD40F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
// paddq r64 to r64 (sse2 only?)
emitterT void ePADDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xD40F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xE80F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xE90F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xF80F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xF90F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xFA0F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBDMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xFA0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void ePSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xD80F );
ModRM<I>( 3, to, from );
}
emitterT void ePSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xD90F );
ModRM<I>( 3, to, from );
}
// psubq m64 to r64 (sse2 only?)
emitterT void ePSUBQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xFB0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
// psubq r64 to r64 (sse2 only?)
emitterT void ePSUBQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xFB0F );
ModRM<I>( 3, to, from );
}
// pmuludq m64 to r64 (sse2 only?)
emitterT void ePMULUDQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xF40F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
// pmuludq r64 to r64 (sse2 only?)
emitterT void ePMULUDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xF40F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x740F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x750F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x760F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPEQDMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0x760F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void ePCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x640F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x650F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x660F );
ModRM<I>( 3, to, from );
}
emitterT void ePCMPGTDMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0x660F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void ePSRLWItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x710F );
ModRM<I>( 3, 2 , to );
write8<I>( from );
}
emitterT void ePSRLDItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x720F );
ModRM<I>( 3, 2 , to );
write8<I>( from );
}
emitterT void ePSRLDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xD20F );
ModRM<I>( 3, to, from );
}
emitterT void ePSLLWItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x710F );
ModRM<I>( 3, 6 , to );
write8<I>( from );
}
emitterT void ePSLLDItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x720F );
ModRM<I>( 3, 6 , to );
write8<I>( from );
}
emitterT void ePSLLDRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xF20F );
ModRM<I>( 3, to, from );
}
emitterT void ePSRAWItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x710F );
ModRM<I>( 3, 4 , to );
write8<I>( from );
}
emitterT void ePSRADItoR( x86MMXRegType to, u8 from )
{
write16<I>( 0x720F );
ModRM<I>( 3, 4 , to );
write8<I>( from );
}
emitterT void ePSRADRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0xE20F );
ModRM<I>( 3, to, from );
}
/* por m64 to r64 */
emitterT void ePORMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xEB0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* pxor m64 to r64 */
emitterT void ePXORMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xEF0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* pand m64 to r64 */
emitterT void ePANDMtoR( x86MMXRegType to, uptr from )
{
//u64 rip = (u64)x86Ptr[0] + 7;
write16<I>( 0xDB0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void ePANDNMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0xDF0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void ePUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x6A0F );
ModRM<I>( 3, to, from );
}
emitterT void ePUNPCKHDQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0x6A0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void ePUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x620F );
ModRM<I>( 3, to, from );
}
emitterT void ePUNPCKLDQMtoR( x86MMXRegType to, uptr from )
{
write16<I>( 0x620F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
emitterT void eMOVQ64ItoR( x86MMXRegType reg, u64 i )
{
eMOVQMtoR<I>( reg, ( uptr )(x86Ptr[0]) + 2 + 7 );
eJMP8<I>( 8 );
write64<I>( i );
}
emitterT void eMOVQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16<I>( 0x6F0F );
ModRM<I>( 3, to, from );
}
emitterT void eMOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset )
{
write16<I>( 0x6F0F );
if( offset < 128 ) {
ModRM<I>( 1, to, from );
write8<I>(offset);
}
else {
ModRM<I>( 2, to, from );
write32<I>(offset);
}
}
emitterT void eMOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset )
{
write16<I>( 0x7F0F );
if( offset < 128 ) {
ModRM<I>( 1, from , to );
write8<I>(offset);
}
else {
ModRM<I>( 2, from, to );
write32<I>(offset);
}
}
/* movd m32 to r64 */
emitterT void eMOVDMtoMMX( x86MMXRegType to, uptr from )
{
write16<I>( 0x6E0F );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
}
/* movd r64 to m32 */
emitterT void eMOVDMMXtoM( uptr to, x86MMXRegType from )
{
write16<I>( 0x7E0F );
ModRM<I>( 0, from, DISP32 );
write32<I>( MEMADDR(to, 4) );
}
emitterT void eMOVD32RtoMMX( x86MMXRegType to, x86IntRegType from )
{
write16<I>( 0x6E0F );
ModRM<I>( 3, to, from );
}
emitterT void eMOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from )
{
write16<I>( 0x6E0F );
ModRM<I>( 0, to, from );
}
emitterT void eMOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset )
{
write16<I>( 0x6E0F );
if( offset < 128 ) {
ModRM<I>( 1, to, from );
write8<I>(offset);
}
else {
ModRM<I>( 2, to, from );
write32<I>(offset);
}
}
emitterT void eMOVD32MMXtoR( x86IntRegType to, x86MMXRegType from )
{
write16<I>( 0x7E0F );
ModRM<I>( 3, from, to );
}
emitterT void eMOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from )
{
write16<I>( 0x7E0F );
ModRM<I>( 0, from, to );
if( to >= 4 ) {
// no idea why
assert( to == ESP );
write8<I>(0x24);
}
}
emitterT void eMOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset )
{
write16<I>( 0x7E0F );
if( offset < 128 ) {
ModRM<I>( 1, from, to );
write8<I>(offset);
}
else {
ModRM<I>( 2, from, to );
write32<I>(offset);
}
}
///* movd r32 to r64 */
//emitterT void eMOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from )
//{
// write16<I>( 0x6E0F );
// ModRM<I>( 3, to, from );
//}
//
///* movq r64 to r32 */
//emitterT void eMOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from )
//{
// write16<I>( 0x7E0F );
// ModRM<I>( 3, from, to );
//}
// untested
emitterT void ePACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
{
write16<I>( 0x630F );
ModRM<I>( 3, to, from );
}
emitterT void ePACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
{
write16<I>( 0x6B0F );
ModRM<I>( 3, to, from );
}
emitterT void ePMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from)
{
write16<I>( 0xD70F );
ModRM<I>( 3, to, from );
}
emitterT void ePINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
{
if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3);
write16<I>( 0xc40f );
ModRM<I>( 3, to, from );
write8<I>( imm8 );
}
emitterT void ePSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8)
{
write16<I>(0x700f);
ModRM<I>( 3, to, from );
write8<I>(imm8);
}
emitterT void ePSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8)
{
write16<I>( 0x700f );
ModRM<I>( 0, to, DISP32 );
write32<I>( MEMADDR(from, 4) );
write8<I>(imm8);
}
emitterT void eMASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from)
{
write16<I>(0xf70f);
ModRM<I>( 3, to, from );
}

1561
pcsx2/x86/ix86/ix86_sse.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -22,164 +22,30 @@
// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions)
// This header should always be included *after* ix86.h.
#ifndef _ix86_included_
#error Dependency fail: Please define _EmitterId_ and include ix86.h first.
#endif
// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the
// overhead of dynarec instructions that use these.
static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
{
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
else SSE_MOVAPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
{
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
else SSE_MOVAPS_XMM_to_M128(to, from);
}
static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
else SSE_MOVAPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
else SSE_MOVAPSRmtoROffset(to, from, offset);
}
static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
else SSE_MOVAPSRtoRmOffset(to, from, offset);
}
static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
{
if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
else SSE_MOVUPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
{
if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
else SSE_MOVUPS_XMM_to_M128(to, from);
}
static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
else SSE_MOVSS_M32_to_XMM(to, from);
}
static __forceinline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
else SSE_MOVSS_XMM_to_M32(to, from);
}
static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
else SSE_MOVSS_XMM_to_Rm(to, from);
}
static __forceinline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
}
static __forceinline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
}
static __forceinline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
else SSE_ORPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
else SSE_ORPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
else SSE_XORPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
else SSE_XORPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
else SSE_ANDPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
else SSE_ANDPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
else SSE_ANDNPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
else SSE_ANDNPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
else SSE_UNPCKLPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
else SSE_UNPCKLPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
{
if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
else SSE_UNPCKHPS_M128_to_XMM(to, from);
}
static __forceinline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
{
if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
else SSE_UNPCKHPS_XMM_to_XMM(to, from);
}
static __forceinline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
if( g_xmmtypes[from] == XMMT_INT ) {
SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
}
else {
SSE_MOVHLPS_XMM_to_XMM(to, from);
}
}
extern void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from );
extern void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 );
extern void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 );
extern void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from );
extern void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from );
extern void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 );
extern void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 );
extern void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from );
extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from );
extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );

View File

@ -18,7 +18,7 @@
#include "PrecompiledHeader.h"
#include "Misc.h"
#include "System.h"
#include "ix86/ix86.h"
// used to make sure regs don't get changed while in recompiler
@ -27,8 +27,8 @@
u8 g_globalMMXSaved = 0;
u8 g_globalXMMSaved = 0;
PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] );
PCSX2_ALIGNED16( u64 g_globalMMXData[8] );
PCSX2_ALIGNED16( u64 g_globalXMMData[2*XMMREGS] );
/////////////////////////////////////////////////////////////////////

View File

@ -29,6 +29,7 @@
// general types
typedef int x86IntRegType;
#define EAX 0
#define EBX 3
#define ECX 1
@ -149,3 +150,252 @@ struct CPUINFO{
extern CPUINFO cpuinfo;
//------------------------------------------------------------------
// templated version of is_s8 is required, so that u16's get correct sign extension treatment.
template< typename T >
static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; }
namespace x86Emitter
{
class x86ModRm;
//////////////////////////////////////////////////////////////////////////////////////////
//
struct x86Register32
{
static const x86Register32 Empty; // defined as an empty/unused value (-1)
int Id;
x86Register32( const x86Register32& src ) : Id( src.Id ) {}
x86Register32() : Id( -1 ) {}
explicit x86Register32( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
bool IsEmpty() const { return Id == -1; }
bool operator==( const x86Register32& src ) const { return Id == src.Id; }
bool operator!=( const x86Register32& src ) const { return Id != src.Id; }
x86ModRm operator+( const x86Register32& right ) const;
x86ModRm operator+( const x86ModRm& right ) const;
x86ModRm operator+( s32 right ) const;
x86ModRm operator*( u32 factor ) const;
x86Register32& operator=( const x86Register32& src )
{
Id = src.Id;
return *this;
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Similar to x86Register, but without the ability to add/combine them with ModSib.
//
class x86Register16
{
public:
static const x86Register16 Empty;
int Id;
x86Register16( const x86Register16& src ) : Id( src.Id ) {}
x86Register16() : Id( -1 ) {}
explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
bool IsEmpty() const { return Id == -1; }
bool operator==( const x86Register16& src ) const { return Id == src.Id; }
bool operator!=( const x86Register16& src ) const { return Id != src.Id; }
x86Register16& operator=( const x86Register16& src )
{
Id = src.Id;
return *this;
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Similar to x86Register, but without the ability to add/combine them with ModSib.
//
class x86Register8
{
public:
static const x86Register8 Empty;
int Id;
x86Register8( const x86Register16& src ) : Id( src.Id ) {}
x86Register8() : Id( -1 ) {}
explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
bool IsEmpty() const { return Id == -1; }
bool operator==( const x86Register8& src ) const { return Id == src.Id; }
bool operator!=( const x86Register8& src ) const { return Id != src.Id; }
x86Register8& operator=( const x86Register8& src )
{
Id = src.Id;
return *this;
}
};
// Use 32 bit registers as out index register (for ModSig memory address calculations)
typedef x86Register32 x86IndexReg;
//////////////////////////////////////////////////////////////////////////////////////////
//
class x86ModRm
{
public:
x86IndexReg Base; // base register (no scale)
x86IndexReg Index; // index reg gets multiplied by the scale
int Factor; // scale applied to the index register, in factor form (not a shift!)
s32 Displacement; // address displacement
public:
x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) :
Base( base ),
Index( index ),
Factor( factor ),
Displacement( displacement )
{
}
explicit x86ModRm( x86IndexReg base, int displacement=0 ) :
Base( base ),
Index(),
Factor(0),
Displacement( displacement )
{
}
explicit x86ModRm( s32 displacement ) :
Base(),
Index(),
Factor(0),
Displacement( displacement )
{
}
static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 );
public:
bool IsByteSizeDisp() const { return is_s8( Displacement ); }
x86IndexReg GetEitherReg() const;
x86ModRm& Add( s32 imm )
{
Displacement += imm;
return *this;
}
x86ModRm& Add( const x86IndexReg& src );
x86ModRm& Add( const x86ModRm& src );
x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); }
x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); }
x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); }
x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); }
};
//////////////////////////////////////////////////////////////////////////////////////////
// ModSib - Internal low-level representation of the ModRM/SIB information.
//
// This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that
// the Base, Index, Scale, and Displacement values are all valid, and it serves as a type-
// safe layer between the x86Register's operators (which generate x86ModRm types) and the
// emitter's ModSib instruction forms. Without this, the x86Register would pass as a
// ModSib type implicitly, and that would cause ambiguity on a number of instructions.
//
class ModSib
{
public:
x86IndexReg Base; // base register (no scale)
x86IndexReg Index; // index reg gets multiplied by the scale
int Scale; // scale applied to the index register, in scale/shift form
s32 Displacement; // offset applied to the Base/Index registers.
explicit ModSib( const x86ModRm& src );
explicit ModSib( s32 disp );
ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 );
x86IndexReg GetEitherReg() const;
bool IsByteSizeDisp() const { return is_s8( Displacement ); }
ModSib& Add( s32 imm )
{
Displacement += imm;
return *this;
}
ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); }
ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); }
protected:
void Reduce();
};
//////////////////////////////////////////////////////////////////////////////////////////
// x86IndexerType - This is a static class which provisions our ptr[] syntax.
//
struct x86IndexerType
{
// passthrough instruction, allows ModSib to pass silently through ptr translation
// without doing anything and without compiler error.
const ModSib& operator[]( const ModSib& src ) const { return src; }
ModSib operator[]( x86IndexReg src ) const
{
return ModSib( src, x86IndexReg::Empty );
}
ModSib operator[]( const x86ModRm& src ) const
{
return ModSib( src );
}
ModSib operator[]( uptr src ) const
{
return ModSib( src );
}
ModSib operator[]( void* src ) const
{
return ModSib( (uptr)src );
}
x86IndexerType() {}
};
// ------------------------------------------------------------------------
extern const x86IndexerType ptr;
extern const x86Register32 eax;
extern const x86Register32 ebx;
extern const x86Register32 ecx;
extern const x86Register32 edx;
extern const x86Register32 esi;
extern const x86Register32 edi;
extern const x86Register32 ebp;
extern const x86Register32 esp;
extern const x86Register16 ax;
extern const x86Register16 bx;
extern const x86Register16 cx;
extern const x86Register16 dx;
extern const x86Register16 si;
extern const x86Register16 di;
extern const x86Register16 bp;
extern const x86Register16 sp;
extern const x86Register8 al;
extern const x86Register8 cl;
extern const x86Register8 dl;
extern const x86Register8 bl;
extern const x86Register8 ah;
extern const x86Register8 ch;
extern const x86Register8 dh;
extern const x86Register8 bh;
}

View File

@ -41,8 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
mVU->regs = vuRegsPtr;
mVU->index = vuIndex;
mVU->microSize = (vuIndex ? 0x4000 : 0x1000);
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8;
mVU->cacheAddr = 0xC0000000 + (vuIndex ? mVU->cacheSize : 0);
mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4;
mVU->cache = NULL;
mVUreset<vuIndex>();
@ -55,22 +54,36 @@ microVUt(void) mVUreset() {
mVUclose<vuIndex>(); // Close
// Create Block Managers
for (int i; i <= mVU->prog.max; i++) {
for (u32 j; j < (mVU->progSize / 2); j++) {
for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}
// Dynarec Cache
mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0x10000000, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache));
mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache));
mVU->ptr = mVU->cache;
// Other Variables
// Setup Entrance/Exit Points
mVUdispatcherA<vuIndex>();
mVUdispatcherB<vuIndex>();
// Program Variables
memset(&mVU->prog, 0, sizeof(mVU->prog));
mVU->prog.finished = 1;
mVU->prog.cleared = 1;
mVU->prog.cur = -1;
mVU->prog.total = -1;
// Setup Dynarec Cache Limits for Each Program
u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes
for (int i = 0; i <= mVU->prog.max; i++) {
mVU->prog.prog[i].x86start = z;
mVU->prog.prog[i].x86ptr = z;
z += (mVU->cacheSize / (mVU->prog.max + 1));
mVU->prog.prog[i].x86end = z;
}
}
// Free Allocated Resources
@ -81,8 +94,8 @@ microVUt(void) mVUclose() {
if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; }
// Delete Block Managers
for (int i; i <= mVU->prog.max; i++) {
for (u32 j; j < (mVU->progSize / 2); j++) {
for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j];
}
}
@ -99,33 +112,6 @@ microVUt(void) mVUclear(u32 addr, u32 size) {
// that its probably not worth it...
}
// Executes for number of cycles
microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
/*
Pseudocode: (ToDo: implement # of cycles)
1) Search for existing program
2) If program not found, goto 5
3) Search for recompiled block
4) If recompiled block found, goto 6
5) Recompile as much blocks as possible
6) Return start execution address of block
*/
microVU* mVU = mVUx;
if ( mVUsearchProg(mVU) ) { // Found Program
//microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState);
//if (block) return block->x86ptrStart; // Found Block
}
// Recompile code
return NULL;
}
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) {
return mVUexecute<0>(startPC, cycles);
}
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) {
return mVUexecute<1>(startPC, cycles);
}
//------------------------------------------------------------------
// Micro VU - Private Functions
//------------------------------------------------------------------
@ -133,6 +119,7 @@ void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) {
// Clears program data (Sets used to 1 because calling this function implies the program will be used at least once)
__forceinline void mVUclearProg(microVU* mVU, int progIndex) {
mVU->prog.prog[progIndex].used = 1;
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
mVU->prog.prog[progIndex].block[i]->reset();
}
@ -171,7 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) {
for (int i = 0; i <= mVU->prog.total; i++) {
//if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/)
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
if (i == mVU->prog.cur) SysPrintf("microVU: Same micro program sent!\n");
if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); }
mVU->prog.cur = i;
mVU->prog.cleared = 0;
mVU->prog.prog[i].used++;
@ -206,98 +193,31 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) {
}
}
//------------------------------------------------------------------
// Dispatcher Functions
//------------------------------------------------------------------
#ifdef _MSC_VER
// Runs VU0 for number of cycles
__declspec(naked) void __fastcall startVU0(u32 startPC, u32 cycles) {
__asm {
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
call mVUexecuteVU0
/*backup cpu state*/
push ebx;
push ebp;
push esi;
push edi;
ldmxcsr g_sseVUMXCSR
/* Should set xmmZ? */
jmp eax
}
}
// Runs VU1 for number of cycles
__declspec(naked) void __fastcall startVU1(u32 startPC, u32 cycles) {
__asm {
call mVUexecuteVU1
/*backup cpu state*/
push ebx;
push ebp;
push esi;
push edi;
ldmxcsr g_sseVUMXCSR
jmp eax
}
}
// Exit point
__declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) {
__asm {
//call mVUcleanUpVU0
/*restore cpu state*/
pop edi;
pop esi;
pop ebp;
pop ebx;
ldmxcsr g_sseMXCSR
emms
ret
}
}
#else
extern "C" {
extern void __fastcall startVU0(u32 startPC, u32 cycles);
extern void __fastcall startVU1(u32 startPC, u32 cycles);
extern void __fastcall endVU0(u32 startPC, u32 cycles);
}
#endif
//------------------------------------------------------------------
// Wrapper Functions - Called by other parts of the Emu
//------------------------------------------------------------------
__forceinline void initVUrec(VURegs* vuRegs, const int vuIndex) {
void initVUrec(VURegs* vuRegs, const int vuIndex) {
if (!vuIndex) mVUinit<0>(vuRegs);
else mVUinit<1>(vuRegs);
}
__forceinline void closeVUrec(const int vuIndex) {
void closeVUrec(const int vuIndex) {
if (!vuIndex) mVUclose<0>();
else mVUclose<1>();
}
__forceinline void resetVUrec(const int vuIndex) {
void resetVUrec(const int vuIndex) {
if (!vuIndex) mVUreset<0>();
else mVUreset<1>();
}
__forceinline void clearVUrec(u32 addr, u32 size, const int vuIndex) {
void clearVUrec(u32 addr, u32 size, const int vuIndex) {
if (!vuIndex) mVUclear<0>(addr, size);
else mVUclear<1>(addr, size);
}
__forceinline void runVUrec(u32 startPC, u32 cycles, const int vuIndex) {
void runVUrec(u32 startPC, u32 cycles, const int vuIndex) {
if (!vuIndex) startVU0(startPC, cycles);
else startVU1(startPC, cycles);
}

View File

@ -18,7 +18,6 @@
#pragma once
#define mVUdebug // Prints Extra Info to Console
#define _EmitterId_ (vuIndex+1)
#include "Common.h"
#include "VU.h"
#include "GS.h"
@ -92,9 +91,12 @@ public:
template<u32 progSize>
struct microProgram {
u32 data[progSize];
u32 data[progSize/4];
u32 used; // Number of times its been used
microBlockManager* block[progSize / 2];
u8* x86ptr; // Pointer to program's recompilation code
u8* x86start; // Start of program's rec-cache
u8* x86end; // Limit of program's rec-cache
microBlockManager* block[progSize/8];
microAllocInfo<progSize> allocInfo;
};
@ -113,30 +115,24 @@ struct microProgManager {
struct microVU {
u32 index; // VU Index (VU0 or VU1)
u32 microSize; // VU Micro Memory Size
u32 progSize; // VU Micro Program Size (microSize/8)
u32 cacheAddr; // VU Cache Start Address
u32 progSize; // VU Micro Program Size (microSize/4)
static const u32 cacheSize = 0x500000; // VU Cache Size
microProgManager<0x1000> prog; // Micro Program Data
microProgManager<0x4000> prog; // Micro Program Data
VURegs* regs; // VU Regs Struct
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
u8* startFunct; // Ptr Function to the Start code for recompiled programs
u8* exitFunct; // Ptr Function to the Exit code for recompiled programs
u8* ptr; // Pointer to next place to write recompiled code to
u32 code; // Contains the current Instruction
u32 iReg; // iReg (only used in recompilation, not execution)
u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
u32 divFlag; // 1 instance of I/D flags
/*
uptr x86eax; // Accumulator register. Used in arithmetic operations.
uptr x86ecx; // Counter register. Used in shift/rotate instructions.
uptr x86edx; // Data register. Used in arithmetic operations and I/O operations.
uptr x86ebx; // Base register. Used as a pointer to data (located in DS in segmented mode).
uptr x86esp; // Stack Pointer register. Pointer to the top of the stack.
uptr x86ebp; // Stack Base Pointer register. Used to point to the base of the stack.
uptr x86esi; // Source register. Used as a pointer to a source in stream operations.
uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations.
*/
u32 VIbackup[2]; // Holds a backup of a VI reg if modified before a branch
u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR)
u32 p; // Holds current P instance index
u32 q; // Holds current Q instance index
};
// microVU rec structs
@ -147,14 +143,24 @@ extern PCSX2_ALIGNED16(microVU microVU1);
extern void (*mVU_UPPER_OPCODE[64])( VURegs* VU, s32 info );
extern void (*mVU_LOWER_OPCODE[128])( VURegs* VU, s32 info );
// Main Functions
microVUt(void) mVUinit(VURegs*);
microVUt(void) mVUreset();
microVUt(void) mVUclose();
microVUt(void) mVUclear(u32, u32);
// Private Functions
__forceinline void mVUclearProg(microVU* mVU, int progIndex);
__forceinline int mVUfindLeastUsedProg(microVU* mVU);
__forceinline int mVUsearchProg(microVU* mVU);
__forceinline void mVUcacheProg(microVU* mVU, int progIndex);
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles);
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles);
#ifdef __LINUX__
microVUt(void) mVUreset();
microVUt(void) mVUclose();
#ifndef __LINUX__
typedef void (__fastcall *mVUrecCall)(u32, u32);
#else
typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if this is correct syntax (should be close xD)
#endif
// Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files)
@ -163,3 +169,4 @@ microVUt(void) mVUclose();
#include "microVU_Alloc.inl"
#include "microVU_Tables.inl"
#include "microVU_Compile.inl"
#include "microVU_Execute.inl"

View File

@ -33,6 +33,8 @@ struct microRegInfo {
u8 VI[32];
u8 q;
u8 p;
u8 r;
u8 xgkick;
};
struct microTempRegInfo {
@ -42,40 +44,19 @@ struct microTempRegInfo {
u8 VIreg; // Index of the VI reg
u8 q; // Holds cycle info for Q reg
u8 p; // Holds cycle info for P reg
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
u8 xgkick; // Holds the cycle info for XGkick
};
template<u32 pSize>
struct microAllocInfo {
microRegInfo regs; // Pipeline info
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR)
u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag
u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time.
u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR
u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
u32 cycles; // Cycles for current block
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC
u32 info[pSize]; // bit 00 = Lower Instruction is NOP
// bit 01
// bit 02
// bit 03
// bit 04
// bit 05 = Write to Q1 or Q2?
// bit 06 = Read Q1 or Q2?
// bit 07 = Read/Write to P1 or P2?
// bit 08 = Update Mac Flags?
// bit 09 = Update Status Flags?
// bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance
// bit 11
// bit 12 = Used with bit 13 to make a 2-bit key for status flag instance
// bit 13
// bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance
// bit 15
// bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance
// bit 17
// bit 18 = Used with bit 19 to make a 2-bit key for status flag instance
// bit 19
// bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance
// bit 21
// bit 22 = Read VI(Fs) from backup memory?
// bit 23 = Read VI(Ft) from backup memory?
u32 startPC; // Start PC for Cur Block
u32 info[pSize/8]; // Info for Instructions in current block
};

View File

@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) {
// FMAC6 - Normal FMAC Opcodes (I Reg)
//------------------------------------------------------------------
#define getIreg(reg) { \
#define getIreg(reg, modXYZW) { \
MOV32ItoR(gprT1, mVU->iReg); \
SSE2_MOVD_R_to_XMM(reg, gprT1); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 8); \
if (!_XYZW_SS) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw<vuIndex>(reg, reg, 0); } \
}
microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) {
Fs = xmmFs;
Ft = xmmFt;
Fd = xmmFs;
getIreg(Ft);
getIreg(Ft, 1);
getReg6(Fs, _Fs_);
}
@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) {
ACC = xmmACC;
Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs;
Ft = xmmFt;
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) {
Ft = xmmFt;
Fd = xmmFs;
ACC = xmmACC;
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) {
Fd = xmmT1;
ACC = xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC);
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
ACCw = xmmACC;
ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1;
SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC);
getIreg(Ft);
getIreg(Ft, 0);
if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); }
else if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
@ -708,19 +708,7 @@ microVUt(void) mVUallocCFLAGb(int reg, int fInstance) {
microVU* mVU = mVUx;
MOV32RtoM(mVU->clipFlag[fInstance], reg);
}
/*
microVUt(void) mVUallocDFLAGa(int reg) {
microVU* mVU = mVUx;
//if (!mVUdivFlag) { MOV32MtoR(reg, (uptr)&mVU->divFlag[readQ]); AND32ItoR(reg, 0xc00); }
//else if (mVUdivFlag & 1) { XOR32RtoR(reg, reg); }
//else { MOV32ItoR(reg, (u32)((mVUdivFlag << 9) & 0xc00)); }
}
microVUt(void) mVUallocDFLAGb(int reg) {
microVU* mVU = mVUx;
//MOV32RtoM((uptr)&mVU->divFlag[writeQ], reg);
}
*/
//------------------------------------------------------------------
// VI Reg Allocators
//------------------------------------------------------------------
@ -734,6 +722,12 @@ microVUt(void) mVUallocVIa(int GPRreg, int _reg_) {
microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
microVU* mVU = mVUx;
if (backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch)
MOV32RtoM((uptr)&mVU->VIbackup[1], GPRreg);
mVUallocVIa<vuIndex>(GPRreg, _reg_);
MOV32RtoM((uptr)&mVU->VIbackup[0], GPRreg);
MOV32MtoR(GPRreg, (uptr)&mVU->VIbackup[1]);
}
if (_reg_ == 0) { return; }
else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); }
else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); }

View File

@ -102,6 +102,49 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) {
analyzeReg4(Ft);
}
//------------------------------------------------------------------
// IALU - IALU Opcodes
//------------------------------------------------------------------
#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } }
#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } }
microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) {
microVU* mVU = mVUx;
if (!Id) { mVUinfo |= _isNOP; }
analyzeVIreg1(Is);
analyzeVIreg1(It);
analyzeVIreg2(Id, 1);
}
microVUt(void) mVUanalyzeIALU2(int Is, int It) {
microVU* mVU = mVUx;
if (!It) { mVUinfo |= _isNOP; }
analyzeVIreg1(Is);
analyzeVIreg2(It, 1);
}
//------------------------------------------------------------------
// MR32 - MR32 Opcode
//------------------------------------------------------------------
// Flips xyzw stalls to yzwx
#define analyzeReg6(reg) { \
if (reg) { \
if (_X) { mVUstall = aMax(mVUstall, aReg(reg).y); } \
if (_Y) { mVUstall = aMax(mVUstall, aReg(reg).z); } \
if (_Z) { mVUstall = aMax(mVUstall, aReg(reg).w); } \
if (_W) { mVUstall = aMax(mVUstall, aReg(reg).x); } \
} \
}
microVUt(void) mVUanalyzeMR32(int Fs, int Ft) {
microVU* mVU = mVUx;
if (!Ft) { mVUinfo |= _isNOP; }
analyzeReg6(Fs);
analyzeReg2(Ft);
}
//------------------------------------------------------------------
// FDIV - DIV/SQRT/RSQRT Opcodes
//------------------------------------------------------------------
@ -143,4 +186,120 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) {
analyzePreg(xCycles);
}
//------------------------------------------------------------------
// MFP - MFP Opcode
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMFP(int Ft) {
microVU* mVU = mVUx; // ToDo: Needs special info for P reg?
if (!Ft) { mVUinfo |= _isNOP; }
analyzeReg2(Ft);
}
//------------------------------------------------------------------
// LQx - LQ/LQD/LQI Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) {
microVU* mVU = mVUx;
analyzeVIreg1(Is);
analyzeReg2(Ft);
if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; }
if (writeIs) { analyzeVIreg2(Is, 1); }
}
//------------------------------------------------------------------
// SQx - SQ/SQD/SQI Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) {
microVU* mVU = mVUx;
analyzeReg1(Fs);
analyzeVIreg1(It);
if (writeIt) { analyzeVIreg2(It, 1); }
}
//------------------------------------------------------------------
// R*** - R Reg Opcodes
//------------------------------------------------------------------
#define analyzeRreg() { mVUregsTemp.r = 1; }
microVUt(void) mVUanalyzeR1(int Fs, int Fsf) {
microVU* mVU = mVUx;
analyzeReg5(Fs, Fsf);
analyzeRreg();
}
microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
microVU* mVU = mVUx;
if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); }
analyzeReg2(Ft);
analyzeRreg();
}
//------------------------------------------------------------------
// Sflag - Status Flag Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeSflag(int It) {
microVU* mVU = mVUx;
if (!It) { mVUinfo |= _isNOP; }
else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time
analyzeVIreg2(It, 1);
}
microVUt(void) mVUanalyzeFSSET() {
microVU* mVU = mVUx;
int i, curPC = iPC;
for (i = mVUcount; i > 0; i--) {
incPC2(-2);
if (isSflag) break;
mVUinfo &= ~_doStatus;
}
iPC = curPC;
}
//------------------------------------------------------------------
// XGkick
//------------------------------------------------------------------
#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); }
#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; }
microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) {
microVU* mVU = mVUx;
analyzeVIreg1(Fs);
analyzeXGkick1();
analyzeXGkick2(xCycles);
}
//------------------------------------------------------------------
// Branches - Branch Opcodes
//------------------------------------------------------------------
#define analyzeBranchVI(reg, infoVal) { \
if (reg && (mVUcount > 0)) { /* Ensures branch is not first opcode in block */ \
incPC(-2); \
if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \
mVUinfo |= _backupVI; \
incPC(2); \
mVUinfo |= infoVal; \
} \
else { incPC(2); } \
} \
}
microVUt(void) mVUanalyzeBranch1(int Is) {
microVU* mVU = mVUx;
if (mVUregs.VI[Is]) { analyzeVIreg1(Is); }
else { analyzeBranchVI(Is, _memReadIs); }
}
microVUt(void) mVUanalyzeBranch2(int Is, int It) {
microVU* mVU = mVUx;
if (mVUregs.VI[Is] || mVUregs.VI[It]) { analyzeVIreg1(Is); analyzeVIreg1(It); }
else { analyzeBranchVI(Is, _memReadIs); analyzeBranchVI(It, _memReadIt);}
}
#endif //PCSX2_MICROVU

View File

@ -19,18 +19,6 @@
#pragma once
#ifdef PCSX2_MICROVU
#ifdef mVUdebug
#define mVUdebugStuff1() { \
if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \
if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \
if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \
if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \
if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \
}
#else
#define mVUdebugStuff1() {}
#endif
#define createBlock(blockEndPtr) { \
block.pipelineState = pipelineState; \
block.x86ptrStart = x86ptrStart; \
@ -41,37 +29,89 @@
} \
}
#define curI mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
#define branchCase(Xcmp) \
CMP16ItoM((uptr)mVU->branch, 0); \
ajmp = Xcmp((uptr)0); \
break
#define branchCase2() { \
incPC(-2); \
MOV32ItoR(gprT1, (xPC + (2 * 8)) & ((vuIndex) ? 0x3fff:0xfff)); \
mVUallocVIb<vuIndex>(gprT1, _Ft_); \
incPC(+2); \
}
#define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); }
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
#define incP() { mVU->p = (mVU->p+1) & 1; }
#define incQ() { mVU->q = (mVU->q+1) & 1; }
microVUt(void) mVUincCycles(int x) {
microVU* mVU = mVUx;
mVUcycles += x;
for (int z = 31; z > 0; z--) {
calcCycles(mVUregs.VF[z].x, x);
calcCycles(mVUregs.VF[z].y, x);
calcCycles(mVUregs.VF[z].z, x);
calcCycles(mVUregs.VF[z].w, x);
}
for (int z = 16; z > 0; z--) {
calcCycles(mVUregs.VI[z], x);
}
if (mVUregs.q) {
calcCycles(mVUregs.q, x);
if (!mVUregs.q) { incQ(); } // Do Status Flag Merging Stuff?
}
if (mVUregs.p) {
calcCycles(mVUregs.p, x);
if (!mVUregs.p) { incP(); }
}
calcCycles(mVUregs.r, x);
calcCycles(mVUregs.xgkick, x);
}
microVUt(void) mVUsetCycles() {
microVU* mVU = mVUx;
incCycles(mVUstall);
if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg
mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP
mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector
mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y);
mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z);
mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w);
}
mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg;
mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg);
mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg;
mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI;
mVUregs.q = mVUregsTemp.q;
mVUregs.p = mVUregsTemp.p;
mVUregs.r = mVUregsTemp.r;
mVUregs.xgkick = mVUregsTemp.xgkick;
}
microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
//------------------------------------------------------------------
// Recompiler
//------------------------------------------------------------------
microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) {
microVU* mVU = mVUx;
microBlock block;
int branch;
u8* thisPtr = mVUcurProg.x86Ptr;
iPC = startPC / 4;
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pblock = mVUblock[iPC]->search(pipelineState, pState);
if (block) { x86SetPtr(pblock->x86ptrEnd); return; }
microBlock* pblock = mVUblock[iPC/2]->search(pipelineState, pState);
if (block) { return pblock->x86ptrStart; }
// First Pass
setCode();
branch = 0;
mVUbranch = 0;
mVUstartPC = iPC;
mVUcount = 0;
mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage
for (;;) {
mVU->p = 0; // All blocks start at p index #0
mVU->q = 0; // All blocks start at q index #0
for (int branch = 0;; ) {
startLoop();
mVUopU<vuIndex, 0>();
if (curI & _Ebit_) { branch = 1; }
@ -79,34 +119,70 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
if (curI & _Ibit_) { incPC(1); mVUinfo |= _isNOP; }
else { incPC(1); mVUopL<vuIndex, 0>(); }
mVUsetCycles<vuIndex>();
if (mVU->p) { mVUinfo |= _readP; }
if (mVU->q) { mVUinfo |= _readQ; }
else { mVUinfo |= _writeQ; }
if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; }
else if (branch == 1) { branch = 2; }
if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; }
incPC(1);
incCycles(1);
mVUcount++;
}
// Second Pass
iPC = startPC;
iPC = mVUstartPC;
setCode();
for (bool x = 1; x; ) {
//
// ToDo: status/mac flag stuff?
//
if (isEOB) { x = 0; }
else if (isBranch) { mVUopU<vuIndex, 1>(); incPC(2); }
//if (isBranch2) { mVUopU<vuIndex, 1>(); incPC(2); }
if (isNop) { mVUopU<vuIndex, 1>(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
else if (!swapOps) { mVUopU<vuIndex, 1>(); incPC(1); mVUopL<vuIndex, 1>(); }
else { incPC(1); mVUopL<vuIndex, 1>(); incPC(-1); mVUopU<vuIndex, 1>(); incPC(1); }
mVUopU<vuIndex, 1>();
if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } }
else { incPC(1); mVUopL<vuIndex, 1>(); }
if (!isBdelay) { incPC(1); }
else {
incPC(-2); // Go back to Branch Opcode
mVUopL<vuIndex, 1>(); // Run Branch Opcode
u32* ajmp;
switch (mVUbranch) {
case 1: break;
case 2: break;
case 3: break;
case 3: branchCase(JZ32); // IBEQ
case 4: branchCase(JGE32); // IBGEZ
case 5: branchCase(JG32); // IBGTZ
case 6: branchCase(JLE32); // IBLEQ
case 7: branchCase(JL32); // IBLTZ
case 8: branchCase(JNZ32); // IBNEQ
case 2: branchCase2(); // BAL
case 1:
// search for block
ajmp = JMP32((uptr)0);
break; // B/BAL
case 9: branchCase2(); // JALR
case 10: break; // JR/JALR
//mVUcurProg.x86Ptr
}
break;
return thisPtr;
}
}
// Do E-bit end stuff here
incCycles(55); // Ensures Valid P/Q instances
mVUcycles -= 55;
if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ);
MOV32ItoM((uptr)&mVU->p, mVU->p);
MOV32ItoM((uptr)&mVU->q, mVU->q);
AND32ItoM((uptr)&microVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it...
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
return thisPtr;
}
#endif //PCSX2_MICROVU

View File

@ -0,0 +1,164 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2009 Pcsx2-Playground Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
#ifdef PCSX2_MICROVU
//------------------------------------------------------------------
// Dispatcher Functions
//------------------------------------------------------------------
// Generates the code for entering recompiled blocks
microVUt(void) mVUdispatcherA() {
static u32 PCSX2_ALIGNED16(vuMXCSR);
microVU* mVU = mVUx;
x86SetPtr(mVU->ptr);
mVU->startFunct = mVU->ptr;
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); }
else { CALLFunc((uptr)mVUexecuteVU1); }
// Backup cpu state
PUSH32R(EBX);
PUSH32R(EBP);
PUSH32R(ESI);
PUSH32R(EDI);
// Load VU's MXCSR state
vuMXCSR = g_sseVUMXCSR;
SSE_LDMXCSR((uptr)&vuMXCSR);
// Load Regs
MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R]);
MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG]);
MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG]);
SHL32ItoR(gprF0, 16);
AND32ItoR(gprF1, 0xffff);
OR32RtoR (gprF0, gprF1);
MOV32RtoR(gprF1, gprF0);
MOV32RtoR(gprF2, gprF0);
MOV32RtoR(gprF3, gprF0);
for (int i = 0; i < 8; i++) {
MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1]);
}
SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC);
SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals);
SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals);
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]);
SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block
JMPR(EAX);
mVU->ptr = x86Ptr;
}
// Generates the code to exit from recompiled blocks
microVUt(void) mVUdispatcherB() {
static u32 PCSX2_ALIGNED16(eeMXCSR);
microVU* mVU = mVUx;
x86SetPtr(mVU->ptr);
mVU->exitFunct = mVU->ptr;
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); }
else { CALLFunc((uptr)mVUcleanUpVU1); }
// Load EE's MXCSR state
eeMXCSR = g_sseMXCSR;
SSE_LDMXCSR((uptr)&eeMXCSR);
// Save Regs
MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances
AND32ItoR(gprT1, 0xffff);
SHR32ItoR(gprF0, 16);
MOV32RtoM((uptr)&mVU->regs->VI[REG_R], gprR);
MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG], gprT1);
MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG], gprF0);
for (int i = 0; i < 8; i++) {
MOVDMMXtoM((uptr)&mVU->regs->VI[i+1], i);
}
SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC);
//SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances
//SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP
//SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ);
// Restore cpu state
POP32R(EDI);
POP32R(ESI);
POP32R(EBP);
POP32R(EBX);
EMMS();
RET();
mVU->ptr = x86Ptr;
mVUcachCheck(mVU->cache, 512);
}
//------------------------------------------------------------------
// Execution Functions
//------------------------------------------------------------------
// Executes for number of cycles
microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
/*
Pseudocode: (ToDo: implement # of cycles)
1) Search for existing program
2) If program not found, goto 5
3) Search for recompiled block
4) If recompiled block found, goto 6
5) Recompile as much blocks as possible
6) Return start execution address of block
*/
microVU* mVU = mVUx;
mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles);
if ( mVUsearchProg(mVU) ) { // Found Program
//microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState);
//if (block) return block->x86ptrStart; // Found Block
}
// Recompile code
return NULL;
}
//------------------------------------------------------------------
// Cleanup Functions
//------------------------------------------------------------------
microVUt(void) mVUcleanUp() {
microVU* mVU = mVUx;
mVU->ptr = mVUcurProg.x86ptr;
mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start));
}
//------------------------------------------------------------------
// Caller Functions
//------------------------------------------------------------------
void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.startFunct)(startPC, cycles); }
void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); }
void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); }
void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); }
void mVUcleanUpVU0() { mVUcleanUp<0>(); }
void mVUcleanUpVU1() { mVUcleanUp<1>(); }
#endif //PCSX2_MICROVU

View File

@ -23,6 +23,10 @@
// Micro VU Micromode Lower instructions
//------------------------------------------------------------------
//------------------------------------------------------------------
// DIV/SQRT/RSQRT
//------------------------------------------------------------------
#define testZero(xmmReg, xmmTemp, gprTemp) { \
SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \
SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \
@ -128,6 +132,10 @@ microVUf(void) mVU_RSQRT() {
}
}
//------------------------------------------------------------------
// EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM
//------------------------------------------------------------------
#define EATANhelper(addr) { \
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \
@ -401,6 +409,10 @@ microVUf(void) mVU_ESUM() {
}
}
//------------------------------------------------------------------
// FCAND/FCEQ/FCGET/FCOR/FCSET
//------------------------------------------------------------------
microVUf(void) mVU_FCAND() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -456,6 +468,10 @@ microVUf(void) mVU_FCSET() {
}
}
//------------------------------------------------------------------
// FMAND/FMEQ/FMOR
//------------------------------------------------------------------
microVUf(void) mVU_FMAND() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -491,9 +507,13 @@ microVUf(void) mVU_FMOR() {
}
}
//------------------------------------------------------------------
// FSAND/FSEQ/FSOR/FSSET
//------------------------------------------------------------------
microVUf(void) mVU_FSAND() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeSflag<vuIndex>(_Ft_); }
else {
mVUallocSFLAGa<vuIndex>(gprT1, fvsInstance);
AND16ItoR(gprT1, _Imm12_);
@ -503,7 +523,7 @@ microVUf(void) mVU_FSAND() {
microVUf(void) mVU_FSEQ() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeSflag<vuIndex>(_Ft_); }
else {
mVUallocSFLAGa<vuIndex>(gprT1, fvsInstance);
XOR16ItoR(gprT1, _Imm12_);
@ -515,7 +535,7 @@ microVUf(void) mVU_FSEQ() {
microVUf(void) mVU_FSOR() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeSflag<vuIndex>(_Ft_); }
else {
mVUallocSFLAGa<vuIndex>(gprT1, fvsInstance);
OR16ItoR(gprT1, _Imm12_);
@ -525,20 +545,22 @@ microVUf(void) mVU_FSOR() {
microVUf(void) mVU_FSSET() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeFSSET<vuIndex>(); }
else {
int flagReg;
getFlagReg(flagReg, fsInstance);
MOV16ItoR(gprT1, (_Imm12_ & 0xfc0));
//if (_Imm12_ & 0xc00) { mVUdivFlag = _Imm12_ >> 9; }
//else { mVUdivFlag = 1; }
//mVUdivFlagT = 4;
AND32ItoR(flagReg, 0x03f);
OR32ItoR(flagReg, (_Imm12_ & 0xfc0));
}
}
//------------------------------------------------------------------
// IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU
//------------------------------------------------------------------
microVUf(void) mVU_IADD() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU1<vuIndex>(_Fd_, _Fs_, _Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (_Ft_ != _Fs_) {
@ -552,7 +574,7 @@ microVUf(void) mVU_IADD() {
microVUf(void) mVU_IADDI() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU2<vuIndex>(_Fs_, _Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
ADD16ItoR(gprT1, _Imm5_);
@ -562,7 +584,7 @@ microVUf(void) mVU_IADDI() {
microVUf(void) mVU_IADDIU() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU2<vuIndex>(_Fs_, _Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
ADD16ItoR(gprT1, _Imm12_);
@ -572,7 +594,7 @@ microVUf(void) mVU_IADDIU() {
microVUf(void) mVU_IAND() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU1<vuIndex>(_Fd_, _Fs_, _Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (_Ft_ != _Fs_) {
@ -585,7 +607,7 @@ microVUf(void) mVU_IAND() {
microVUf(void) mVU_IOR() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU1<vuIndex>(_Fd_, _Fs_, _Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (_Ft_ != _Fs_) {
@ -598,7 +620,7 @@ microVUf(void) mVU_IOR() {
microVUf(void) mVU_ISUB() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU1<vuIndex>(_Fd_, _Fs_, _Ft_); }
else {
if (_Ft_ != _Fs_) {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
@ -615,7 +637,7 @@ microVUf(void) mVU_ISUB() {
microVUf(void) mVU_ISUBIU() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeIALU2<vuIndex>(_Fs_, _Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
SUB16ItoR(gprT1, _Imm12_);
@ -623,18 +645,13 @@ microVUf(void) mVU_ISUBIU() {
}
}
microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ }
else {
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
//------------------------------------------------------------------
// MFIR/MFP/MOVE/MR32/MTIR
//------------------------------------------------------------------
microVUf(void) mVU_MFIR() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); }
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOVSX32R16toR(gprT1, gprT1);
@ -646,25 +663,25 @@ microVUf(void) mVU_MFIR() {
microVUf(void) mVU_MFP() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeMFP<vuIndex>(_Ft_); }
else {
getPreg(xmmFt);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MTIR() {
microVUf(void) mVU_MOVE() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); }
else {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MR32() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeMR32<vuIndex>(_Fs_, _Ft_); }
else {
mVUloadReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15);
if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); }
@ -672,9 +689,22 @@ microVUf(void) mVU_MR32() {
}
}
microVUf(void) mVU_MTIR() {
microVU* mVU = mVUx;
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); }
else {
MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
//------------------------------------------------------------------
// ILW/ILWR
//------------------------------------------------------------------
microVUf(void) mVU_ILW() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); }
else {
if (!_Fs_) {
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS );
@ -684,7 +714,7 @@ microVUf(void) mVU_ILW() {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
@ -693,25 +723,29 @@ microVUf(void) mVU_ILW() {
microVUf(void) mVU_ILWR() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); }
else {
if (!_Fs_) {
MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS );
MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUaddrFix<vuIndex>(gprT1);
MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works.
MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS);
if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
}
//------------------------------------------------------------------
// ISW/ISWR
//------------------------------------------------------------------
microVUf(void) mVU_ISW() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); }
else {
if (!_Fs_) {
int imm = getVUmem(_Imm11_);
@ -726,17 +760,17 @@ microVUf(void) mVU_ISW() {
mVUallocVIa<vuIndex>(gprT2, _Ft_);
ADD32ItoR(gprT1, _Imm11_);
mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
}
}
}
microVUf(void) mVU_ISWR() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); }
else {
if (!_Fs_) {
mVUallocVIa<vuIndex>(gprT1, _Ft_);
@ -749,17 +783,21 @@ microVUf(void) mVU_ISWR() {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
mVUaddrFix<vuIndex>(gprT1);
if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem);
if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4);
if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8);
if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12);
}
}
}
//------------------------------------------------------------------
// LQ/LQD/LQI
//------------------------------------------------------------------
microVUf(void) mVU_LQ() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeLQ<vuIndex>(_Ft_, _Fs_, 0); }
else {
if (!_Fs_) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W);
@ -777,9 +815,9 @@ microVUf(void) mVU_LQ() {
microVUf(void) mVU_LQD() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeLQ<vuIndex>(_Ft_, _Fs_, 1); }
else {
if (!_Fs_ && _Ft_) {
if (!_Fs_ && !noWriteVF) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
@ -787,7 +825,7 @@ microVUf(void) mVU_LQD() {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
SUB16ItoR(gprT1, 1);
mVUallocVIb<vuIndex>(gprT1, _Fs_); // ToDo: Backup to memory check.
if (_Ft_) {
if (!noWriteVF) {
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
@ -798,15 +836,15 @@ microVUf(void) mVU_LQD() {
microVUf(void) mVU_LQI() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeLQ<vuIndex>(_Ft_, _Fs_, 1); }
else {
if (!_Fs_ && _Ft_) {
if (!_Fs_ && !noWriteVF) {
mVUloadReg<vuIndex>(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W);
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
else {
mVUallocVIa<vuIndex>((_Ft_) ? gprT1 : gprT2, _Fs_);
if (_Ft_) {
if (!noWriteVF) {
MOV32RtoR(gprT2, gprT1);
mVUaddrFix<vuIndex>(gprT1);
mVUloadReg2<vuIndex>(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W);
@ -818,9 +856,13 @@ microVUf(void) mVU_LQI() {
}
}
//------------------------------------------------------------------
// SQ/SQD/SQI
//------------------------------------------------------------------
microVUf(void) mVU_SQ() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeSQ<vuIndex>(_Fs_, _Ft_, 0); }
else {
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
@ -838,7 +880,7 @@ microVUf(void) mVU_SQ() {
microVUf(void) mVU_SQD() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeSQ<vuIndex>(_Fs_, _Ft_, 1); }
else {
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
@ -857,7 +899,7 @@ microVUf(void) mVU_SQD() {
microVUf(void) mVU_SQI() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeSQ<vuIndex>(_Fs_, _Ft_, 1); }
else {
if (!_Ft_) {
getReg7(xmmFs, _Fs_);
@ -875,9 +917,13 @@ microVUf(void) mVU_SQI() {
}
}
//------------------------------------------------------------------
// RINIT/RGET/RNEXT/RXOR
//------------------------------------------------------------------
microVUf(void) mVU_RINIT() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
else {
if (_Fs_ || (_Fsf_ == 3)) {
getReg8(gprR, _Fs_, _Fsf_);
@ -890,7 +936,7 @@ microVUf(void) mVU_RINIT() {
microVUt(void) mVU_RGET_() {
microVU* mVU = mVUx;
if (_Ft_) {
if (!noWriteVF) {
if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR);
if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR);
if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR);
@ -900,13 +946,13 @@ microVUt(void) mVU_RGET_() {
microVUf(void) mVU_RGET() {
microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 1); }
else { mVU_RGET_<vuIndex>(); }
}
microVUf(void) mVU_RNEXT() {
microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ }
if (!recPass) { mVUanalyzeR2<vuIndex>(_Ft_, 0); }
else {
// algorithm from www.project-fao.org
MOV32RtoR(gprT1, gprR);
@ -928,7 +974,7 @@ microVUf(void) mVU_RNEXT() {
microVUf(void) mVU_RXOR() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeR1<vuIndex>(_Fs_, _Fsf_); }
else {
if (_Fs_ || (_Fsf_ == 3)) {
getReg8(gprT1, _Fs_, _Fsf_);
@ -938,21 +984,27 @@ microVUf(void) mVU_RXOR() {
}
}
//------------------------------------------------------------------
// WaitP/WaitQ
//------------------------------------------------------------------
microVUf(void) mVU_WAITP() {
microVU* mVU = mVUx;
if (!recPass) {}
else {}
if (!recPass) { mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); }
}
microVUf(void) mVU_WAITQ() {
microVU* mVU = mVUx;
if (!recPass) {}
else {}
if (!recPass) { mVUstall = aMax(mVUstall, mVUregs.q); }
}
//------------------------------------------------------------------
// XTOP/XITOP
//------------------------------------------------------------------
microVUf(void) mVU_XTOP() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); }
else {
MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
@ -961,13 +1013,17 @@ microVUf(void) mVU_XTOP() {
microVUf(void) mVU_XITOP() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); }
else {
MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop );
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
//------------------------------------------------------------------
// XGkick
//------------------------------------------------------------------
microVUt(void) __fastcall mVU_XGKICK_(u32 addr) {
microVU* mVU = mVUx;
u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff));
@ -981,61 +1037,106 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); }
microVUf(void) mVU_XGKICK() {
microVU* mVU = mVUx;
if (!recPass) {}
if (!recPass) { mVUanalyzeXGkick<vuIndex>(_Fs_, 4); }
else {
mVUallocVIa<vuIndex>(gprT2, _Fs_); // gprT2 = ECX for __fastcall
PUSH32R(gprR); // gprR = EDX is volatile so backup
if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0);
else CALLFunc((uptr)mVU_XGKICK1);
POP32R(gprR); // Restore
}
}
//------------------------------------------------------------------
// Branches
// Branches/Jumps
//------------------------------------------------------------------
microVUf(void) mVU_B() {
microVU* mVU = mVUx;
mVUbranch = 1;
if (!recPass) { /*mVUinfo |= _isBranch2;*/ }
}
microVUf(void) mVU_BAL() {
microVU* mVU = mVUx;
mVUbranch = 1;
if (recPass) {
MOV32ItoR(gprT1, (xPC + (2 * 8)) & 0xffff);
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
mVUbranch = 2;
if (!recPass) { /*mVUinfo |= _isBranch2;*/ analyzeVIreg2(_Ft_, 1); }
else {}
}
microVUf(void) mVU_IBEQ() {
microVU* mVU = mVUx;
mVUbranch = 2;
mVUbranch = 3;
if (!recPass) { mVUanalyzeBranch2<vuIndex>(_Fs_, _Ft_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else { mVUallocVIa<vuIndex>(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); }
MOV32RtoM((uptr)mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBGEZ() {
microVU* mVU = mVUx;
mVUbranch = 2;
mVUbranch = 4;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
//SHR32ItoR(gprT1, 15);
MOV32RtoM((uptr)mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBGTZ() {
microVU* mVU = mVUx;
mVUbranch = 2;
}
microVUf(void) mVU_IBLTZ() {
microVU* mVU = mVUx;
mVUbranch = 2;
mVUbranch = 5;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOV32RtoM((uptr)mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBLEZ() {
microVU* mVU = mVUx;
mVUbranch = 2;
mVUbranch = 6;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
MOV32RtoM((uptr)mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBLTZ() {
microVU* mVU = mVUx;
mVUbranch = 7;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
//SHR32ItoR(gprT1, 15);
MOV32RtoM((uptr)mVU->branch, gprT1);
}
}
microVUf(void) mVU_IBNE() {
microVU* mVU = mVUx;
mVUbranch = 2;
mVUbranch = 8;
if (!recPass) { mVUanalyzeBranch2<vuIndex>(_Fs_, _Ft_); }
else {
if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else mVUallocVIa<vuIndex>(gprT1, _Fs_);
if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]);
else { mVUallocVIa<vuIndex>(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); }
MOV32RtoM((uptr)mVU->branch, gprT1);
}
}
microVUf(void) mVU_JR() {
microVU* mVU = mVUx;
mVUbranch = 3;
mVUbranch = 9;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); }
}
microVUf(void) mVU_JALR() {
microVU* mVU = mVUx;
mVUbranch = 3;
mVUbranch = 10;
if (!recPass) { mVUanalyzeBranch1<vuIndex>(_Fs_); analyzeVIreg2(_Ft_, 1); }
}
#endif //PCSX2_MICROVU

View File

@ -66,9 +66,9 @@ declareAllVariables
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define _Ft_ ((mVU->code >> 16) & 0x1F) // The rt part of the instruction register
#define _Fs_ ((mVU->code >> 11) & 0x1F) // The rd part of the instruction register
#define _Fd_ ((mVU->code >> 6) & 0x1F) // The sa part of the instruction register
#define _Ft_ ((mVU->code >> 16) & 0x1F) // The ft/it part of the instruction register
#define _Fs_ ((mVU->code >> 11) & 0x1F) // The fs/is part of the instruction register
#define _Fd_ ((mVU->code >> 6) & 0x1F) // The fd/id part of the instruction register
#define _X ((mVU->code>>24) & 0x1)
#define _Y ((mVU->code>>23) & 0x1)
@ -143,20 +143,25 @@ declareAllVariables
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
#define mVUbranch mVUallocInfo.branch
#define mVUcycles mVUallocInfo.cycles
#define mVUcount mVUallocInfo.count
#define mVUstall mVUallocInfo.maxStall
#define mVUdivFlag mVUallocInfo.divFlag
#define mVUdivFlagT mVUallocInfo.divFlagTimer
#define mVUregs mVUallocInfo.regs
#define mVUregsTemp mVUallocInfo.regsTemp
#define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2]
#define mVUstartPC mVUallocInfo.startPC
#define iPC mVUallocInfo.curPC
#define xPC ((iPC / 2) * 8)
#define incCycles(x) { mVUcycles += x; }
#define curI mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
#define incCycles(x) { mVUincCycles<vuIndex>(x); }
#define _isNOP (1<<0) // Skip Lower Instruction
#define _isBranch (1<<1) // Cur Instruction is a Branch
#define _isEOB (1<<2) // End of Block
#define _isBdelay (1<<3) // Cur Instruction in Branch Delay slot
#define _isSflag (1<<4) // Cur Instruction uses status flag
#define _writeQ (1<<5)
#define _readQ (1<<6)
#define _writeP (1<<7)
@ -166,17 +171,25 @@ declareAllVariables
#define _doStatus (1<<9)
#define _fmInstance (3<<10)
#define _fsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpmInstance (3<<10)
#define _fpsInstance (3<<12)
#define _fcInstance (3<<14)
#define _fpcInstance (3<<14)
#define _fvmInstance (3<<16)
#define _fvsInstance (3<<18)
#define _fvcInstance (3<<14)
#define _fvcInstance (3<<20)
#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
#define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
#define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches)
#define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches)
#define _writesVI (1<<25) // Current Instruction writes to VI
#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction
//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR)
#define isNOP (mVUinfo & (1<<0))
#define isBranch (mVUinfo & (1<<1))
#define isEOB (mVUinfo & (1<<2))
#define isBdelay (mVUinfo & (1<<3))
#define isSflag (mVUinfo & (1<<4))
#define writeQ ((mVUinfo >> 5) & 1)
#define readQ ((mVUinfo >> 6) & 1)
#define writeP ((mVUinfo >> 7) & 1)
@ -192,11 +205,32 @@ declareAllVariables
#define fvmInstance ((mVUinfo >> 16) & 3)
#define fvsInstance ((mVUinfo >> 18) & 3)
#define fvcInstance ((mVUinfo >> 20) & 3)
//#define getFs (mVUinfo & (1<<13))
//#define getFt (mVUinfo & (1<<14))
//#define fpmInstance (((u8)((mVUinfo & (3<<10)) >> 10) - 1) & 0x3)
#define noWriteVF (mVUinfo & (1<<21))
#define backupVI (mVUinfo & (1<<22))
#define memReadIs (mVUinfo & (1<<23))
#define memReadIt (mVUinfo & (1<<24))
#define writesVI (mVUinfo & (1<<25))
#define swapOps (mVUinfo & (1<<26))
//#define isBranch2 (mVUinfo & (1<<27))
#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9)
#define mmVI(_VIreg_) (_VIreg_ - 1)
#ifdef mVUdebug
#define mVUlog Console::Notice
#define mVUdebug1() { \
if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \
if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \
if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \
if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \
if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \
}
#else
#define mVUlog 0&&
#define mVUdebug1() {}
#endif
#define mVUcachCheck(start, limit) { \
uptr diff = mVU->ptr - start; \
if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \
}

View File

@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) {
microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
switch( xyzw ) {
case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X
case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y
case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z
case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W
default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break;
case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X
case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y
case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z
case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W
default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break;
}
}
@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
switch ( xyzw ) {
case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1);
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // YW
case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
break; // YZ
case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // YZW
case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1);
else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // XW
case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; //XZ
case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg);
case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8);
break; //XZW
case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW
SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1);
SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12);
break; // XYW
case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg);
SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1);
SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset);
SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8);
break; // XYZ
case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X
case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y
case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z
case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W
case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY
case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW
default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW
case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X
case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y
case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z
case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W
case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY
case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW
default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW
}
}
@ -251,7 +251,7 @@ microVUt(void) mVUaddrFix(int gprReg) {
u8 *jmpA, *jmpB;
CMP32ItoR(EAX, 0x400);
jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs
AND32ItoR(EAX, 0x43f);
AND32ItoR(EAX, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
jmpB = JMP8(0);
x86SetJ8(jmpA);
AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around

View File

@ -26,7 +26,7 @@
#pragma once
#define PLUGIN_VERSION 14
#define PLUGIN_VERSION 15
#include "GSVector.h"

View File

@ -349,8 +349,6 @@ protected:
OverrideOutput();
m_tc->InvalidateTextures(context->FRAME, context->ZBUF);
if(s_dump)
{
CString str;
@ -360,6 +358,8 @@ protected:
if(s_savez) ds->m_texture.Save(str);
// if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str); // TODO
}
m_tc->InvalidateTextures(context->FRAME, context->ZBUF);
}
virtual void Draw(int prim, Texture& rt, Texture& ds, typename GSTextureCache<Device>::GSTexture* tex) = 0;
@ -507,6 +507,35 @@ protected:
#pragma endregion
#pragma region GoW2 z buffer clear
if(m_game.title == CRC::GodOfWar2)
{
DWORD FBP = m_context->FRAME.Block();
DWORD FBW = m_context->FRAME.FBW;
DWORD FPSM = m_context->FRAME.PSM;
if((FBP == 0x00f00 || FBP == 0x00100) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100
{
GIFRegTEX0 TEX0;
TEX0.TBP0 = FBP;
TEX0.TBW = FBW;
TEX0.PSM = FPSM;
if(GSTextureCache<Device>::GSDepthStencil* ds = m_tc->GetDepthStencil(TEX0, m_width, m_height))
{
m_dev.ClearDepth(ds->m_texture, 0);
}
return false;
}
return true;
}
#pragma endregion
return true;
}

View File

@ -2081,6 +2081,27 @@ bool GSC_GodOfWar(const GSFrameInfo& fi, int& skip)
return true;
}
bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
{
if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16 // ntsc
|| fi.TME && fi.FBP == 0x02100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT16) // pal
{
skip = 30; // shadows
}
else if(fi.TME && fi.FBP == 0x00500 && fi.FPSM == PSM_PSMCT24 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT32) // pal
{
// skip = 17; // only looks correct at native resolution
}
}
else
{
}
return true;
}
bool GSC_GiTS(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
@ -2172,7 +2193,7 @@ bool GSState::IsBadFrame(int& skip)
map[CRC::Tekken5] = GSC_Tekken5;
map[CRC::IkkiTousen] = GSC_IkkiTousen;
map[CRC::GodOfWar] = GSC_GodOfWar;
map[CRC::GodOfWar2] = GSC_GodOfWar;
map[CRC::GodOfWar2] = GSC_GodOfWar2;
map[CRC::GiTS] = GSC_GiTS;
map[CRC::Onimusha3] = GSC_Onimusha3;
map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss;

View File

@ -140,7 +140,7 @@ void GSTexture9::Unmap()
bool GSTexture9::Save(CString fn, bool dds)
{
CComPtr<IDirect3DResource9> res;
CComPtr<IDirect3DSurface9> surface;
if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL)
{
@ -153,8 +153,6 @@ bool GSTexture9::Save(CString fn, bool dds)
if(desc.Format != D3DFMT_D32F_LOCKABLE)
return false;
CComPtr<IDirect3DSurface9> surface;
hr = m_dev->CreateOffscreenPlainSurface(desc.Width, desc.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, NULL);
D3DLOCKED_RECT slr, dlr;
@ -175,24 +173,22 @@ bool GSTexture9::Save(CString fn, bool dds)
m_surface->UnlockRect();
surface->UnlockRect();
res = surface;
}
else
{
res = m_surface;
surface = m_surface;
}
if(CComQIPtr<IDirect3DSurface9> surface = res)
if(surface != NULL)
{
return SUCCEEDED(D3DXSaveSurfaceToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, surface, NULL, NULL));
}
if(CComQIPtr<IDirect3DTexture9> texture = res)
/*
if(CComQIPtr<IDirect3DTexture9> texture = surface)
{
return SUCCEEDED(D3DXSaveTextureToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, texture, NULL));
}
*/
return false;
}

View File

@ -1404,18 +1404,59 @@ INT_PTR CALLBACK DialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, LPARAM l
if (i >= 0) {
unsigned int index = (unsigned int)SendMessage(GetDlgItem(hWnd, IDC_FORCEFEEDBACK), CB_GETITEMDATA, i, 0);
if (index < (unsigned int) dm->numDevices) {
Device *dev = dm->devices[index];
ForceFeedbackBinding *b;
int count = CreateEffectBinding(dm->devices[index], 0, port, slot, cmd-ID_BIG_MOTOR, &b);
int count = CreateEffectBinding(dev, 0, port, slot, cmd-ID_BIG_MOTOR, &b);
if (b) {
for (int j=0; j<2 && j <dm->devices[index]->numFFAxes; j++) {
int needSet = 1;
if (dev->api == XINPUT && dev->numFFAxes == 2) {
needSet = 0;
if (cmd == ID_BIG_MOTOR) {
b->axes[0].force = BASE_SENSITIVITY;
}
else {
b->axes[1].force = BASE_SENSITIVITY;
}
}
else if (dev->api == DI) {
int bigIndex=0, littleIndex=0;
int constantEffect = 0, squareEffect = 0;
int j;
for (j=0; j<dev->numFFAxes; j++) {
// DI object instance. 0 is x-axis, 1 is y-axis.
int instance = (dev->ffAxes[j].id>>8)&0xFFFF;
if (instance == 0) {
bigIndex = j;
}
else if (instance == 1) {
littleIndex = j;
}
}
for (j=0; j<dev->numFFEffectTypes; j++) {
if (!wcsicmp(L"13541C20-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) constantEffect = j;
if (!wcsicmp(L"13541C22-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) squareEffect = j;
}
needSet = 0;
if (cmd == ID_BIG_MOTOR) {
b->axes[bigIndex].force = BASE_SENSITIVITY;
b->axes[littleIndex].force = 1;
b->effectIndex = constantEffect;
}
else {
b->axes[bigIndex].force = 1;
b->axes[littleIndex].force = BASE_SENSITIVITY;
b->effectIndex = squareEffect;
}
}
if (needSet) {
for (int j=0; j<2 && j <dev->numFFAxes; j++) {
b->axes[j].force = BASE_SENSITIVITY;
}
}
if (count >= 0) {
PropSheet_Changed(hWndProp, hWnd);
UnselectAll(hWndList);
ListView_SetItemState(hWndList, count, LVIS_SELECTED, LVIS_SELECTED);
}
PropSheet_Changed(hWndProp, hWnd);
}
}
}
@ -1867,7 +1908,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L
InsertMenuItemW(hMenu, index, 1, &info);
}
else {
info.wID = port2+2*slot2;
info.wID = port2+2*slot2+1;
wsprintfW(text, L"Swap with %s", pad);
InsertMenuItemW(hMenu, 0, 1, &info);
}
@ -1879,12 +1920,14 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L
DestroyMenu(hMenu);
if (!res) break;
if (res > 0) {
res--;
slot2 = res / 2;
port2 = res&1;
PadConfig padCfgTemp = config.padConfigs[port1][slot1];
config.padConfigs[port1][slot1] = config.padConfigs[port2][slot2];
config.padConfigs[port2][slot2] = padCfgTemp;
for (int i=0; i<dm->numDevices; i++) {
if (dm->devices[i]->type == IGNORE) continue;
PadBindings bindings = dm->devices[i]->pads[port1][slot1];
dm->devices[i]->pads[port1][slot1] = dm->devices[i]->pads[port2][slot2];
dm->devices[i]->pads[port2][slot2] = bindings;
@ -1892,6 +1935,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L
}
else {
for (int i=0; i<dm->numDevices; i++) {
if (dm->devices[i]->type == IGNORE) continue;
free(dm->devices[i]->pads[port1][slot1].bindings);
for (int j=0; j<dm->devices[i]->pads[port1][slot1].numFFBindings; j++) {
free(dm->devices[i]->pads[port1][slot1].ffBindings[j].axes);

View File

@ -1,13 +1,16 @@
// This is undoubtedly completely unnecessary.
#include "KeyboardQueue.h"
static int numQueuedEvents = 0;
static keyEvent queuedEvents[20];
// What MS calls a single process Mutex. Faster, supposedly.
// More importantly, can be abbreviated, amusingly, as cSection.
static CRITICAL_SECTION cSection;
static int csInitialized = 0;
static u8 csInitialized = 0;
#define EVENT_QUEUE_LEN 16
// Actually points one beyond the last queued event.
static u8 lastQueuedEvent = 0;
static u8 nextQueuedEvent = 0;
static keyEvent queuedEvents[EVENT_QUEUE_LEN];
void QueueKeyEvent(int key, int event) {
if (!csInitialized) {
@ -15,50 +18,42 @@ void QueueKeyEvent(int key, int event) {
InitializeCriticalSection(&cSection);
}
EnterCriticalSection(&cSection);
if (numQueuedEvents >= 15) {
// Generally shouldn't happen.
for (int i=0; i<15; i++) {
queuedEvents[i] = queuedEvents[i+5];
}
numQueuedEvents = 15;
}
int index = numQueuedEvents;
// Move escape to top of queue. May do something
// Don't queue events if escape is on top of queue. This is just for safety
// purposes when a game is killing the emulator for whatever reason.
if (nextQueuedEvent == lastQueuedEvent ||
queuedEvents[nextQueuedEvent].key != VK_ESCAPE ||
queuedEvents[nextQueuedEvent].evt != KEYPRESS) {
// Clear queue on escape down, bringing escape to front. May do something
// with shift/ctrl/alt and F-keys, later.
if (event == KEYPRESS && key == VK_ESCAPE) {
while (index) {
queuedEvents[index-1] = queuedEvents[index];
index--;
nextQueuedEvent = lastQueuedEvent;
}
queuedEvents[lastQueuedEvent].key = key;
queuedEvents[lastQueuedEvent].evt = event;
lastQueuedEvent = (lastQueuedEvent + 1) % EVENT_QUEUE_LEN;
// If queue wrapped around, remove last element.
if (nextQueuedEvent == lastQueuedEvent) {
nextQueuedEvent = (nextQueuedEvent + 1) % EVENT_QUEUE_LEN;
}
}
queuedEvents[index].key = key;
queuedEvents[index].evt = event;
numQueuedEvents ++;
LeaveCriticalSection(&cSection);
}
int GetQueuedKeyEvent(keyEvent *event) {
int out = 0;
if (numQueuedEvents) {
if (lastQueuedEvent == nextQueuedEvent) return 0;
EnterCriticalSection(&cSection);
// Shouldn't be 0, but just in case...
if (numQueuedEvents) {
*event = queuedEvents[0];
numQueuedEvents--;
out = 1;
for (int i=0; i<numQueuedEvents; i++) {
queuedEvents[i] = queuedEvents[i+1];
}
}
*event = queuedEvents[nextQueuedEvent];
nextQueuedEvent = (nextQueuedEvent + 1) % EVENT_QUEUE_LEN;
LeaveCriticalSection(&cSection);
}
return out;
return 1;
}
void ClearKeyQueue() {
if (numQueuedEvents) {
numQueuedEvents = 0;
}
lastQueuedEvent = nextQueuedEvent;
if (csInitialized) {
DeleteCriticalSection(&cSection);
csInitialized = 0;

View File

@ -23,7 +23,7 @@
#endif
// LilyPad version.
#define VERSION ((0<<8) | 9 | (11<<24))
#define VERSION ((0<<8) | 10 | (0<<24))
// Used to prevent reading input and cleaning up input devices at the same time.
// Only an issue when not reading input in GS thread and disabling devices due to
@ -115,7 +115,7 @@ struct ButtonSum {
Stick sticks[3];
};
// Freeze data, for a single pad.
struct PadFreezeData {
// Digital / Analog / DS2 Native
u8 mode;
@ -168,6 +168,12 @@ u8 Cap (int i) {
return (u8) i;
}
inline void ReleaseModifierKeys() {
QueueKeyEvent(VK_SHIFT, KEYRELEASE);
QueueKeyEvent(VK_MENU, KEYRELEASE);
QueueKeyEvent(VK_CONTROL, KEYRELEASE);
}
// RefreshEnabledDevices() enables everything that can potentially
// be bound to, as well as the "Ignore keyboard" device.
//
@ -677,9 +683,7 @@ s32 CALLBACK PADinit(u32 flags) {
query.numBytes = 0;
ClearKeyQueue();
// Just in case, when resuming emulation.
QueueKeyEvent(VK_SHIFT, KEYRELEASE);
QueueKeyEvent(VK_MENU, KEYRELEASE);
QueueKeyEvent(VK_CONTROL, KEYRELEASE);
ReleaseModifierKeys();
return 0;
}
@ -756,9 +760,7 @@ ExtraWndProcResult HackWndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lPara
case WM_ACTIVATEAPP:
// Release any buttons PCSX2 may think are down when
// losing/gaining focus.
QueueKeyEvent(VK_SHIFT, KEYRELEASE);
QueueKeyEvent(VK_MENU, KEYRELEASE);
QueueKeyEvent(VK_CONTROL, KEYRELEASE);
ReleaseModifierKeys();
// Need to do this when not reading input from gs thread.
// Checking for that case not worth the effort.
@ -1227,12 +1229,14 @@ DWORD WINAPI RenameWindowThreadProc(void *lpParameter) {
}
keyEvent* CALLBACK PADkeyEvent() {
// If running both pads, ignore every other call. So if two keys pressed in same interval...
static char eventCount = 0;
eventCount++;
if (eventCount < openCount) {
return 0;
}
eventCount = 0;
if (!config.GSThreadUpdates) {
Update(2, 0);
}
@ -1327,7 +1331,7 @@ s32 CALLBACK PADfreeze(int mode, freezeData *data) {
break;
}
// Note sure if the cast is strictly necessary, but feel safest with it there...
// Not sure if the cast is strictly necessary, but feel safest with it there...
*(PadFreezeData*)&pads[port][slot] = pdata.padData[slot];
}
if (pdata.slot < 4)

View File

@ -150,11 +150,10 @@ public:
}
void Deactivate() {
if (xInputVibration.wLeftMotorSpeed || xInputVibration.wRightMotorSpeed) {
memset(&xInputVibration, 0, sizeof(xInputVibration));
pXInputSetState(index, &xInputVibration);
}
memset(ps2Vibration, 0, sizeof(ps2Vibration));
pXInputSetState(index, &xInputVibration);
FreeState();
if (active) {
if (!--xInputActiveCount) {