Added CMOV to the emitter, renamed x86Struct stuff to iStruct, renamed XMMREGS / X86REGS / MMXREGS defines to iRegCnt_XMM / iRegCnt_GPR / iRegCnt_MMX, and undid a couple u32 optimizations which could have caused unexpected behavior in the future, if we ever decided to employ some particularly obscure case of self-modifying code.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@985 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-15 21:00:32 +00:00
parent 4d2adcae9f
commit f228a91c93
19 changed files with 1014 additions and 1002 deletions

View File

@ -2925,6 +2925,18 @@
RelativePath="..\..\x86\ix86\ix86_fpu.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_impl_group1.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_impl_group2.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_impl_movs.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_inlines.inl"
>

View File

@ -40,10 +40,10 @@ u32 g_recWriteback = 0;
char g_globalXMMLocked = 0;
#endif
_xmmregs xmmregs[XMMREGS], s_saveXMMregs[XMMREGS];
_xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM];
// X86 caching
_x86regs x86regs[X86REGS], s_saveX86regs[X86REGS];
_x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
#include <vector>
using namespace std;
@ -119,16 +119,16 @@ int _getFreeXMMreg()
int i, tempi;
u32 bestcount = 0x10000;
for (i=0; i<XMMREGS; i++) {
if (xmmregs[(i+s_xmmchecknext)%XMMREGS].inuse == 0) {
int ret = (s_xmmchecknext+i)%XMMREGS;
s_xmmchecknext = (s_xmmchecknext+i+1)%XMMREGS;
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[(i+s_xmmchecknext)%iREGCNT_XMM].inuse == 0) {
int ret = (s_xmmchecknext+i)%iREGCNT_XMM;
s_xmmchecknext = (s_xmmchecknext+i+1)%iREGCNT_XMM;
return ret;
}
}
// check for dead regs
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].needed) continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) {
if( !(g_pCurInstInfo->regs[xmmregs[i].reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) ) {
@ -139,7 +139,7 @@ int _getFreeXMMreg()
}
// check for future xmm usage
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].needed) continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) {
if( !(g_pCurInstInfo->regs[xmmregs[i].reg] & EEINST_XMM) ) {
@ -151,7 +151,7 @@ int _getFreeXMMreg()
tempi = -1;
bestcount = 0xffff;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].needed) continue;
if (xmmregs[i].type != XMMTYPE_TEMP) {
@ -196,7 +196,7 @@ int _allocVFtoXMMreg(VURegs *VU, int xmmreg, int vfreg, int mode) {
int i;
int readfromreg = -1;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if ((xmmregs[i].inuse == 0) || (xmmregs[i].type != XMMTYPE_VFREG) ||
(xmmregs[i].reg != vfreg) || (xmmregs[i].VU != XMM_CONV_VU(VU)))
continue;
@ -250,7 +250,7 @@ int _checkXMMreg(int type, int reg, int mode)
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse && (xmmregs[i].type == (type&0xff)) && (xmmregs[i].reg == reg)) {
if ( !(xmmregs[i].mode & MODE_READ) ) {
@ -279,7 +279,7 @@ int _allocACCtoXMMreg(VURegs *VU, int xmmreg, int mode) {
int i;
int readfromreg = -1;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_ACC) continue;
if (xmmregs[i].VU != XMM_CONV_VU(VU) ) continue;
@ -335,7 +335,7 @@ int _allocACCtoXMMreg(VURegs *VU, int xmmreg, int mode) {
int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode) {
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_FPREG) continue;
if (xmmregs[i].reg != fpreg) continue;
@ -372,7 +372,7 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
{
int i;
for (i=0; i<XMMREGS; i++)
for (i=0; i<iREGCNT_XMM; i++)
{
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_GPRREG) continue;
@ -478,7 +478,7 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode)
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_FPACC) continue;
@ -516,7 +516,7 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode)
void _addNeededVFtoXMMreg(int vfreg) {
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_VFREG) continue;
if (xmmregs[i].reg != vfreg) continue;
@ -530,7 +530,7 @@ void _addNeededGPRtoXMMreg(int gprreg)
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_GPRREG) continue;
if (xmmregs[i].reg != gprreg) continue;
@ -544,7 +544,7 @@ void _addNeededGPRtoXMMreg(int gprreg)
void _addNeededACCtoXMMreg() {
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_ACC) continue;
@ -557,7 +557,7 @@ void _addNeededACCtoXMMreg() {
void _addNeededFPtoXMMreg(int fpreg) {
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_FPREG) continue;
if (xmmregs[i].reg != fpreg) continue;
@ -571,7 +571,7 @@ void _addNeededFPtoXMMreg(int fpreg) {
void _addNeededFPACCtoXMMreg() {
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
if (xmmregs[i].type != XMMTYPE_FPACC) continue;
@ -584,7 +584,7 @@ void _addNeededFPACCtoXMMreg() {
void _clearNeededXMMregs() {
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if( xmmregs[i].needed ) {
@ -605,7 +605,7 @@ void _deleteVFtoXMMreg(int reg, int vu, int flush)
int i;
VURegs *VU = vu ? &VU1 : &VU0;
for (i=0; i<XMMREGS; i++)
for (i=0; i<iREGCNT_XMM; i++)
{
if (xmmregs[i].inuse && (xmmregs[i].type == XMMTYPE_VFREG) &&
(xmmregs[i].reg == reg) && (xmmregs[i].VU == vu))
@ -627,13 +627,13 @@ void _deleteVFtoXMMreg(int reg, int vu, int flush)
// xyz, don't destroy w
int t0reg;
for (t0reg = 0; t0reg < XMMREGS; ++t0reg)
for (t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg)
{
if (!xmmregs[t0reg].inuse )
break;
}
if (t0reg < XMMREGS )
if (t0reg < iREGCNT_XMM )
{
SSE_MOVHLPS_XMM_to_XMM(t0reg, i);
SSE_MOVLPS_XMM_to_M64(VU_VFx_ADDR(xmmregs[i].reg), i);
@ -675,7 +675,7 @@ void _deleteACCtoXMMreg(int vu, int flush)
int i;
VURegs *VU = vu ? &VU1 : &VU0;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse && (xmmregs[i].type == XMMTYPE_ACC) && (xmmregs[i].VU == vu)) {
switch(flush) {
@ -691,11 +691,11 @@ void _deleteACCtoXMMreg(int vu, int flush)
if( xmmregs[i].mode & MODE_VUZ ) {
// xyz, don't destroy w
int t0reg;
for(t0reg = 0; t0reg < XMMREGS; ++t0reg ) {
for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) {
if( !xmmregs[t0reg].inuse ) break;
}
if( t0reg < XMMREGS ) {
if( t0reg < iREGCNT_XMM ) {
SSE_MOVHLPS_XMM_to_XMM(t0reg, i);
SSE_MOVLPS_XMM_to_M64(VU_ACCx_ADDR, i);
SSE_MOVSS_XMM_to_M32(VU_ACCx_ADDR+8, t0reg);
@ -735,7 +735,7 @@ void _deleteACCtoXMMreg(int vu, int flush)
void _deleteGPRtoXMMreg(int reg, int flush)
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_GPRREG && xmmregs[i].reg == reg ) {
@ -769,7 +769,7 @@ void _deleteGPRtoXMMreg(int reg, int flush)
void _deleteFPtoXMMreg(int reg, int flush)
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_FPREG && xmmregs[i].reg == reg ) {
switch(flush) {
case 0:
@ -795,7 +795,7 @@ void _deleteFPtoXMMreg(int reg, int flush)
void _freeXMMreg(int xmmreg)
{
assert( xmmreg < XMMREGS );
assert( xmmreg < iREGCNT_XMM );
if (!xmmregs[xmmreg].inuse) return;
@ -810,11 +810,11 @@ void _freeXMMreg(int xmmreg)
{
// don't destroy w
int t0reg;
for(t0reg = 0; t0reg < XMMREGS; ++t0reg ) {
for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) {
if( !xmmregs[t0reg].inuse ) break;
}
if( t0reg < XMMREGS )
if( t0reg < iREGCNT_XMM )
{
SSE_MOVHLPS_XMM_to_XMM(t0reg, xmmreg);
SSE_MOVLPS_XMM_to_M64(VU_VFx_ADDR(xmmregs[xmmreg].reg), xmmreg);
@ -852,11 +852,11 @@ void _freeXMMreg(int xmmreg)
// don't destroy w
int t0reg;
for(t0reg = 0; t0reg < XMMREGS; ++t0reg ) {
for(t0reg = 0; t0reg < iREGCNT_XMM; ++t0reg ) {
if( !xmmregs[t0reg].inuse ) break;
}
if( t0reg < XMMREGS )
if( t0reg < iREGCNT_XMM )
{
SSE_MOVHLPS_XMM_to_XMM(t0reg, xmmreg);
SSE_MOVLPS_XMM_to_M64(VU_ACCx_ADDR, xmmreg);
@ -909,7 +909,7 @@ void _freeXMMreg(int xmmreg)
int _getNumXMMwrite()
{
int num = 0, i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if( xmmregs[i].inuse && (xmmregs[i].mode&MODE_WRITE) ) ++num;
}
@ -920,12 +920,12 @@ u8 _hasFreeXMMreg()
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (!xmmregs[i].inuse) return 1;
}
// check for dead regs
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].needed) continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) {
if( !EEINST_ISLIVEXMM(xmmregs[i].reg) ) {
@ -935,7 +935,7 @@ u8 _hasFreeXMMreg()
}
// check for dead regs
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].needed) continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) {
if( !(g_pCurInstInfo->regs[xmmregs[i].reg]&EEINST_USED) ) {
@ -951,12 +951,12 @@ void _moveXMMreg(int xmmreg)
int i;
if( !xmmregs[xmmreg].inuse ) return;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse) continue;
break;
}
if( i == XMMREGS ) {
if( i == iREGCNT_XMM ) {
_freeXMMreg(xmmreg);
return;
}
@ -971,7 +971,7 @@ void _flushXMMregs()
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
assert( xmmregs[i].type != XMMTYPE_TEMP );
@ -988,7 +988,7 @@ void _freeXMMregs()
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (xmmregs[i].inuse == 0) continue;
assert( xmmregs[i].type != XMMTYPE_TEMP );

View File

@ -114,7 +114,7 @@ struct _x86regs {
u32 extra; // extra info assoc with the reg
};
extern _x86regs x86regs[X86REGS], s_saveX86regs[X86REGS];
extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
uptr _x86GetAddr(int type, int reg);
void _initX86regs();
@ -287,7 +287,7 @@ extern u32 g_recWriteback; // used for jumps (VUrec mess!)
extern u32 g_cpuRegHasLive1, g_cpuPrevRegHasLive1;
extern u32 g_cpuRegHasSignExt, g_cpuPrevRegHasSignExt;
extern _xmmregs xmmregs[XMMREGS], s_saveXMMregs[XMMREGS];
extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM];
extern u16 g_x86AllocCounter;
extern u16 g_xmmAllocCounter;
@ -361,7 +361,7 @@ void _recMove128MtoRmOffset(u32 offset, u32 from);
// a negative shift is for sign extension
extern int _signExtendGPRtoMMX(x86MMXRegType to, u32 gprreg, int shift);
extern _mmxregs mmxregs[MMXREGS], s_saveMMXregs[MMXREGS];
extern _mmxregs mmxregs[iREGCNT_MMX], s_saveMMXregs[iREGCNT_MMX];
extern u16 x86FpuState;
extern void iDumpRegisters(u32 startpc, u32 temp);

View File

@ -1640,7 +1640,7 @@ void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info)
}
if( regd == EEREC_ACC ) {
assert( EEREC_TEMP < XMMREGS );
assert( EEREC_TEMP < iREGCNT_XMM );
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); }

View File

@ -223,7 +223,7 @@ public:
u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only
u32 vuxy; // corresponding bit is set if reg's xyz channels are used only
_xmmregs startregs[XMMREGS], endregs[XMMREGS];
_xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM];
int nStartx86, nEndx86; // indices into s_vecRegArray
int allocX86Regs;
@ -571,7 +571,7 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
if( (*itblock)->nStartx86 >= 0 ) {
pregs = &s_vecRegArray[(*itblock)->nStartx86];
fprintf(f, "STR: ");
for(i = 0; i < X86REGS; ++i) {
for(i = 0; i < iREGCNT_GPR; ++i) {
if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg);
else fprintf(f, "-1 ");
}
@ -581,7 +581,7 @@ void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
if( (*itblock)->nEndx86 >= 0 ) {
fprintf(f, "END: ");
pregs = &s_vecRegArray[(*itblock)->nEndx86];
for(i = 0; i < X86REGS; ++i) {
for(i = 0; i < iREGCNT_GPR; ++i) {
if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg);
else fprintf(f, "-1 ");
}
@ -1879,14 +1879,14 @@ void VuBaseBlock::AssignVFRegs()
if( type & BLOCKTYPE_ANALYZED ) {
// check if changed
for(i = 0; i < XMMREGS; ++i) {
for(i = 0; i < iREGCNT_XMM; ++i) {
if( xmmregs[i].inuse != startregs[i].inuse )
break;
if( xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type) )
break;
}
if( i == XMMREGS ) return; // nothing changed
if( i == iREGCNT_XMM ) return; // nothing changed
}
u8* oldX86 = x86Ptr;
@ -1904,7 +1904,7 @@ void VuBaseBlock::AssignVFRegs()
// redo the counters so that the proper regs are released
for(int j = 0; j < XMMREGS; ++j) {
for(int j = 0; j < iREGCNT_XMM; ++j) {
if( xmmregs[j].inuse ) {
if( xmmregs[j].type == XMMTYPE_VFREG ) {
int count = 0;
@ -2119,10 +2119,10 @@ void VuBaseBlock::AssignVIRegs(int parent)
// child
assert( allocX86Regs == -1 );
allocX86Regs = s_vecRegArray.size();
s_vecRegArray.resize(allocX86Regs+X86REGS);
s_vecRegArray.resize(allocX86Regs+iREGCNT_GPR);
_x86regs* pregs = &s_vecRegArray[allocX86Regs];
memset(pregs, 0, sizeof(_x86regs)*X86REGS);
memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR);
assert( parents.size() > 0 );
@ -2210,10 +2210,10 @@ static void SuperVUAssignRegs()
// assign the regs
int regid = s_vecRegArray.size();
s_vecRegArray.resize(regid+X86REGS);
s_vecRegArray.resize(regid+iREGCNT_GPR);
_x86regs* mergedx86 = &s_vecRegArray[regid];
memset(mergedx86, 0, sizeof(_x86regs)*X86REGS);
memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR);
if( !bfirst ) {
*(u32*)usedregs = *((u32*)usedregs+1) = *((u32*)usedregs+2) = *((u32*)usedregs+3) = 0;
@ -2221,7 +2221,7 @@ static void SuperVUAssignRegs()
FORIT(itblock2, s_markov.children) {
assert( (*itblock2)->allocX86Regs >= 0 );
_x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs];
for(int i = 0; i < X86REGS; ++i) {
for(int i = 0; i < iREGCNT_GPR; ++i) {
if( pregs[i].inuse && pregs[i].reg < 16) {
//assert( pregs[i].reg < 16);
usedregs[pregs[i].reg]++;
@ -2237,7 +2237,7 @@ static void SuperVUAssignRegs()
mergedx86[num].reg = i;
mergedx86[num].type = (s_vu?X86TYPE_VU1:0)|X86TYPE_VI;
mergedx86[num].mode = MODE_READ;
if( ++num >= X86REGS )
if( ++num >= iREGCNT_GPR )
break;
if( num == ESP )
++num;
@ -2559,7 +2559,7 @@ void svudispfntemp()
// frees all regs taking into account the livevars
void SuperVUFreeXMMregs(u32* livevars)
{
for(int i = 0; i < XMMREGS; ++i) {
for(int i = 0; i < iREGCNT_XMM; ++i) {
if( xmmregs[i].inuse ) {
// same reg
if( (xmmregs[i].mode & MODE_WRITE) ) {
@ -2772,7 +2772,7 @@ void VuBaseBlock::Recompile()
#ifdef SUPERVU_X86CACHING
if( nEndx86 >= 0 ) {
_x86regs* endx86 = &s_vecRegArray[nEndx86];
for(int i = 0; i < X86REGS; ++i) {
for(int i = 0; i < iREGCNT_GPR; ++i) {
if( endx86[i].inuse ) {
if( s_JumpX86 == i && x86regs[s_JumpX86].inuse ) {
@ -3239,7 +3239,7 @@ void VuInstruction::Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz)
#ifdef SUPERVU_X86CACHING
// redo the counters so that the proper regs are released
for(int j = 0; j < X86REGS; ++j) {
for(int j = 0; j < iREGCNT_GPR; ++j) {
if( x86regs[j].inuse && X86_ISVI(x86regs[j].type) ) {
int count = 0;
itinst2 = itinst;

View File

@ -78,16 +78,16 @@ int _getFreeX86reg(int mode)
int i, tempi;
u32 bestcount = 0x10000;
int maxreg = (mode&MODE_8BITREG)?4:X86REGS;
int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR;
for (i=0; i<X86REGS; i++) {
int reg = (g_x86checknext+i)%X86REGS;
for (i=0; i<iREGCNT_GPR; i++) {
int reg = (g_x86checknext+i)%iREGCNT_GPR;
if( reg == 0 || reg == ESP ) continue;
if( reg >= maxreg ) continue;
if( (mode&MODE_NOFRAME) && reg==EBP ) continue;
if (x86regs[reg].inuse == 0) {
g_x86checknext = (reg+1)%X86REGS;
g_x86checknext = (reg+1)%iREGCNT_GPR;
return reg;
}
}
@ -207,16 +207,16 @@ int _allocX86reg(int x86reg, int type, int reg, int mode)
// don't alloc EAX and ESP,EBP if MODE_NOFRAME
int oldmode = mode;
int noframe = mode&MODE_NOFRAME;
int maxreg = (mode&MODE_8BITREG)?4:X86REGS;
int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR;
mode &= ~(MODE_NOFRAME|MODE_8BITREG);
int readfromreg = -1;
if( type != X86TYPE_TEMP ) {
if( maxreg < X86REGS ) {
if( maxreg < iREGCNT_GPR ) {
// make sure reg isn't in the higher regs
for(i = maxreg; i < X86REGS; ++i) {
for(i = maxreg; i < iREGCNT_GPR; ++i) {
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) continue;
if( mode & MODE_READ ) {
@ -324,7 +324,7 @@ int _checkX86reg(int type, int reg, int mode)
{
int i;
for (i=0; i<X86REGS; i++) {
for (i=0; i<iREGCNT_GPR; i++) {
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) {
if( !(x86regs[i].mode & MODE_READ) && (mode&MODE_READ) ) {
@ -348,7 +348,7 @@ void _addNeededX86reg(int type, int reg)
{
int i;
for (i=0; i<X86REGS; i++) {
for (i=0; i<iREGCNT_GPR; i++) {
if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type ) continue;
x86regs[i].counter = g_x86AllocCounter++;
@ -359,7 +359,7 @@ void _addNeededX86reg(int type, int reg)
void _clearNeededX86regs() {
int i;
for (i=0; i<X86REGS; i++) {
for (i=0; i<iREGCNT_GPR; i++) {
if (x86regs[i].needed ) {
if( x86regs[i].inuse && (x86regs[i].mode&MODE_WRITE) )
x86regs[i].mode |= MODE_READ;
@ -372,7 +372,7 @@ void _deleteX86reg(int type, int reg, int flush)
{
int i;
for (i=0; i<X86REGS; i++) {
for (i=0; i<iREGCNT_GPR; i++) {
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) {
switch(flush) {
case 0:
@ -401,7 +401,7 @@ void _deleteX86reg(int type, int reg, int flush)
void _freeX86reg(int x86reg)
{
assert( x86reg >= 0 && x86reg < X86REGS );
assert( x86reg >= 0 && x86reg < iREGCNT_GPR );
if( x86regs[x86reg].inuse && (x86regs[x86reg].mode&MODE_WRITE) ) {
x86regs[x86reg].mode &= ~MODE_WRITE;
@ -419,7 +419,7 @@ void _freeX86reg(int x86reg)
void _freeX86regs() {
int i;
for (i=0; i<X86REGS; i++) {
for (i=0; i<iREGCNT_GPR; i++) {
if (!x86regs[i].inuse) continue;
_freeX86reg(i);
@ -459,16 +459,16 @@ int _getFreeMMXreg()
int tempi = -1;
u32 bestcount = 0x10000;
for (i=0; i<MMXREGS; i++) {
if (mmxregs[(s_mmxchecknext+i)%MMXREGS].inuse == 0) {
int ret = (s_mmxchecknext+i)%MMXREGS;
s_mmxchecknext = (s_mmxchecknext+i+1)%MMXREGS;
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[(s_mmxchecknext+i)%iREGCNT_MMX].inuse == 0) {
int ret = (s_mmxchecknext+i)%iREGCNT_MMX;
s_mmxchecknext = (s_mmxchecknext+i+1)%iREGCNT_MMX;
return ret;
}
}
// check for dead regs
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].needed) continue;
if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { // mmxregs[i] is unsigned, and MMX_GPR == 0, so the first part is always true.
if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR] & (EEINST_LIVE0|EEINST_LIVE1)) ) {
@ -483,7 +483,7 @@ int _getFreeMMXreg()
}
// check for future xmm usage
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].needed) continue;
if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) {
if( !(g_pCurInstInfo->regs[mmxregs[i].reg] & EEINST_MMX) ) {
@ -493,7 +493,7 @@ int _getFreeMMXreg()
}
}
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].needed) continue;
if (mmxregs[i].reg != MMX_TEMP) {
@ -523,7 +523,7 @@ int _allocMMXreg(int mmxreg, int reg, int mode)
int i;
if( reg != MMX_TEMP ) {
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse == 0 || mmxregs[i].reg != reg ) continue;
if( MMX_ISGPR(reg)) {
@ -602,7 +602,7 @@ int _allocMMXreg(int mmxreg, int reg, int mode)
int _checkMMXreg(int reg, int mode)
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse && mmxregs[i].reg == reg ) {
if( !(mmxregs[i].mode & MODE_READ) && (mode&MODE_READ) ) {
@ -635,7 +635,7 @@ void _addNeededMMXreg(int reg)
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse == 0) continue;
if (mmxregs[i].reg != reg) continue;
@ -648,7 +648,7 @@ void _clearNeededMMXregs()
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if( mmxregs[i].needed ) {
// setup read to any just written regs
if( mmxregs[i].inuse && (mmxregs[i].mode&MODE_WRITE) )
@ -661,7 +661,7 @@ void _clearNeededMMXregs()
void _deleteMMXreg(int reg, int flush)
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse && mmxregs[i].reg == reg ) {
@ -696,7 +696,7 @@ void _deleteMMXreg(int reg, int flush)
int _getNumMMXwrite()
{
int num = 0, i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if( mmxregs[i].inuse && (mmxregs[i].mode&MODE_WRITE) ) ++num;
}
@ -706,12 +706,12 @@ int _getNumMMXwrite()
u8 _hasFreeMMXreg()
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (!mmxregs[i].inuse) return 1;
}
// check for dead regs
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].needed) continue;
if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) {
if( !EEINST_ISLIVE64(mmxregs[i].reg-MMX_GPR) ) {
@ -721,7 +721,7 @@ u8 _hasFreeMMXreg()
}
// check for dead regs
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].needed) continue;
if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) {
if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) {
@ -735,7 +735,7 @@ u8 _hasFreeMMXreg()
void _freeMMXreg(int mmxreg)
{
assert( mmxreg < MMXREGS );
assert( mmxreg < iREGCNT_MMX );
if (!mmxregs[mmxreg].inuse) return;
if (mmxregs[mmxreg].mode & MODE_WRITE ) {
@ -762,12 +762,12 @@ void _moveMMXreg(int mmxreg)
int i;
if( !mmxregs[mmxreg].inuse ) return;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse) continue;
break;
}
if( i == MMXREGS ) {
if( i == iREGCNT_MMX ) {
_freeMMXreg(mmxreg);
return;
}
@ -783,7 +783,7 @@ void _flushMMXregs()
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse == 0) continue;
if( mmxregs[i].mode & MODE_WRITE ) {
@ -807,7 +807,7 @@ void _flushMMXregs()
void _freeMMXregs()
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (mmxregs[i].inuse == 0) continue;
assert( mmxregs[i].reg != MMX_TEMP );

View File

@ -301,7 +301,7 @@ void _eeFlushAllUnused()
}
//TODO when used info is done for FPU and VU0
for(i = 0; i < XMMREGS; ++i) {
for(i = 0; i < iREGCNT_XMM; ++i) {
if( xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG )
_freeXMMreg(i);
}
@ -394,7 +394,7 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
int _flushXMMunused()
{
int i;
for (i=0; i<XMMREGS; i++) {
for (i=0; i<iREGCNT_XMM; i++) {
if (!xmmregs[i].inuse || xmmregs[i].needed || !(xmmregs[i].mode&MODE_WRITE) ) continue;
if (xmmregs[i].type == XMMTYPE_GPRREG ) {
@ -413,7 +413,7 @@ int _flushXMMunused()
int _flushMMXunused()
{
int i;
for (i=0; i<MMXREGS; i++) {
for (i=0; i<iREGCNT_MMX; i++) {
if (!mmxregs[i].inuse || mmxregs[i].needed || !(mmxregs[i].mode&MODE_WRITE) ) continue;
if( MMX_ISGPR(mmxregs[i].reg) ) {
@ -1217,7 +1217,7 @@ void recompileNextInstruction(int delayslot)
g_pCurInstInfo++;
for(i = 0; i < MMXREGS; ++i) {
for(i = 0; i < iREGCNT_MMX; ++i) {
if( mmxregs[i].inuse ) {
assert( MMX_ISGPR(mmxregs[i].reg) );
count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR);
@ -1226,7 +1226,7 @@ void recompileNextInstruction(int delayslot)
}
}
for(i = 0; i < XMMREGS; ++i) {
for(i = 0; i < iREGCNT_XMM; ++i) {
if( xmmregs[i].inuse ) {
count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, xmmregs[i].type, xmmregs[i].reg);
if( count > 0 ) xmmregs[i].counter = 1000-count;
@ -1587,7 +1587,7 @@ StartRecomp:
// see how many stores there are
u32 j;
// use xmmregs since only supporting lwc1,lq,swc1,sq
for(j = i+8; j < s_nEndBlock && j < i+4*XMMREGS; j += 4 ) {
for(j = i+8; j < s_nEndBlock && j < i+4*iREGCNT_XMM; j += 4 ) {
u32 nncode = *(u32*)PSM(j);
if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) ||
_eeLoadWritesRs(nncode))
@ -1596,7 +1596,7 @@ StartRecomp:
if( j > i+8 ) {
u32 num = (j-i)>>2; // number of stores that can coissue
assert( num <= XMMREGS );
assert( num <= iREGCNT_XMM );
g_pCurInstInfo[0].numpeeps = num-1;
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;

View File

@ -62,7 +62,7 @@ __threadlocal u8 *x86Ptr;
__threadlocal u8 *j8Ptr[32];
__threadlocal u32 *j32Ptr[32];
__threadlocal XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT };
__threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT };
namespace x86Emitter {
@ -73,22 +73,22 @@ const x86IndexerTypeExplicit<1> ptr8;
// ------------------------------------------------------------------------
template< int OperandSize > const x86Register<OperandSize> x86Register<OperandSize>::Empty;
template< int OperandSize > const iRegister<OperandSize> iRegister<OperandSize>::Empty;
const x86IndexReg x86IndexReg::Empty;
const x86Register32
const iRegister32
eax( 0 ), ebx( 3 ),
ecx( 1 ), edx( 2 ),
esi( 6 ), edi( 7 ),
ebp( 5 ), esp( 4 );
const x86Register16
const iRegister16
ax( 0 ), bx( 3 ),
cx( 1 ), dx( 2 ),
si( 6 ), di( 7 ),
bp( 5 ), sp( 4 );
const x86Register8
const iRegister8
al( 0 ), cl( 1 ),
dl( 2 ), bl( 3 ),
ah( 4 ), ch( 5 ),
@ -96,28 +96,8 @@ const x86Register8
namespace Internal
{
const Group1ImplAll<G1Type_ADD> iADD;
const Group1ImplAll<G1Type_OR> iOR;
const Group1ImplAll<G1Type_ADC> iADC;
const Group1ImplAll<G1Type_SBB> iSBB;
const Group1ImplAll<G1Type_AND> iAND;
const Group1ImplAll<G1Type_SUB> iSUB;
const Group1ImplAll<G1Type_XOR> iXOR;
const Group1ImplAll<G1Type_CMP> iCMP;
const Group2ImplAll<G2Type_ROL> iROL;
const Group2ImplAll<G2Type_ROR> iROR;
const Group2ImplAll<G2Type_RCL> iRCL;
const Group2ImplAll<G2Type_RCR> iRCR;
const Group2ImplAll<G2Type_SHL> iSHL;
const Group2ImplAll<G2Type_SHR> iSHR;
const Group2ImplAll<G2Type_SAR> iSAR;
const MovExtendImplAll<true> iMOVSX;
const MovExtendImplAll<false> iMOVZX;
// Performance note: VC++ wants to use byte/word register form for the following
// ModRM/SibSB constructors if we use iWrite<u8>, and furthermore unrolls the
// ModRM/SibSB constructors when we use iWrite<u8>, and furthermore unrolls the
// the shift using a series of ADDs for the following results:
// add cl,cl
// add cl,cl
@ -130,21 +110,38 @@ namespace Internal
// register aliases and false dependencies. (although may have been ideal for early-
// brand P4s with a broken barrel shifter?). The workaround is to do our own manual
// x86Ptr access and update using a u32 instead of u8. Thanks to little endianness,
// the same end result is achieved and no false dependencies are generated.
// the same end result is achieved and no false dependencies are generated. The draw-
// back is that it clobbers 3 bytes past the end of the write, which could cause a
// headache for someone who himself is doing some kind of headache-inducing amount of
// recompiler SMC. So we don't do a work-around, and just hope for the compiler to
// stop sucking someday instead. :)
//
// (btw, I know this isn't a critical performance item by any means, but it's
// annoying simply because it *should* be an easy thing to optimize)
__forceinline void ModRM( uint mod, uint reg, uint rm )
{
*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
x86Ptr++;
iWrite<u8>( (mod << 6) | (reg << 3) | rm );
//*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
//x86Ptr++;
}
__forceinline void SibSB( u32 ss, u32 index, u32 base )
{
*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
x86Ptr++;
iWrite<u8>( (ss << 6) | (index << 3) | base );
//*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
//x86Ptr++;
}
__forceinline void iWriteDisp( int regfield, s32 displacement )
{
ModRM( 0, regfield, ModRm_UseDisp32 );
iWrite<s32>( displacement );
}
__forceinline void iWriteDisp( int regfield, const void* address )
{
iWriteDisp( regfield, (s32)address );
}
// ------------------------------------------------------------------------
@ -172,7 +169,7 @@ namespace Internal
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
__forceinline void EmitSibMagic( uint regfield, const ModSibBase& info )
void EmitSibMagic( uint regfield, const ModSibBase& info )
{
jASSUME( regfield < 8 );
@ -188,8 +185,7 @@ namespace Internal
if( info.Index.IsEmpty() )
{
ModRM( 0, regfield, ModRm_UseDisp32 );
iWrite<s32>( info.Displacement );
iWriteDisp( regfield, info.Displacement );
return;
}
else
@ -227,14 +223,63 @@ namespace Internal
if( displacement_size != 0 )
{
*(s32*)x86Ptr = info.Displacement;
x86Ptr += (displacement_size == 1) ? 1 : 4;
if( displacement_size == 1 )
iWrite<s8>( info.Displacement );
else
iWrite<s32>( info.Displacement );
}
}
}
using namespace Internal;
const Group1ImplAll<G1Type_ADD> iADD;
const Group1ImplAll<G1Type_OR> iOR;
const Group1ImplAll<G1Type_ADC> iADC;
const Group1ImplAll<G1Type_SBB> iSBB;
const Group1ImplAll<G1Type_AND> iAND;
const Group1ImplAll<G1Type_SUB> iSUB;
const Group1ImplAll<G1Type_XOR> iXOR;
const Group1ImplAll<G1Type_CMP> iCMP;
const Group2ImplAll<G2Type_ROL> iROL;
const Group2ImplAll<G2Type_ROR> iROR;
const Group2ImplAll<G2Type_RCL> iRCL;
const Group2ImplAll<G2Type_RCR> iRCR;
const Group2ImplAll<G2Type_SHL> iSHL;
const Group2ImplAll<G2Type_SHR> iSHR;
const Group2ImplAll<G2Type_SAR> iSAR;
const MovExtendImplAll<true> iMOVSX;
const MovExtendImplAll<false> iMOVZX;
const CMovImplGeneric iCMOV;
const CMovImplAll<Jcc_Above> iCMOVA;
const CMovImplAll<Jcc_AboveOrEqual> iCMOVAE;
const CMovImplAll<Jcc_Below> iCMOVB;
const CMovImplAll<Jcc_BelowOrEqual> iCMOVBE;
const CMovImplAll<Jcc_Greater> iCMOVG;
const CMovImplAll<Jcc_GreaterOrEqual> iCMOVGE;
const CMovImplAll<Jcc_Less> iCMOVL;
const CMovImplAll<Jcc_LessOrEqual> iCMOVLE;
const CMovImplAll<Jcc_Zero> iCMOVZ;
const CMovImplAll<Jcc_Equal> iCMOVE;
const CMovImplAll<Jcc_NotZero> iCMOVNZ;
const CMovImplAll<Jcc_NotEqual> iCMOVNE;
const CMovImplAll<Jcc_Overflow> iCMOVO;
const CMovImplAll<Jcc_NotOverflow> iCMOVNO;
const CMovImplAll<Jcc_Carry> iCMOVC;
const CMovImplAll<Jcc_NotCarry> iCMOVNC;
const CMovImplAll<Jcc_Signed> iCMOVS;
const CMovImplAll<Jcc_Unsigned> iCMOVNS;
const CMovImplAll<Jcc_ParityEven> iCMOVPE;
const CMovImplAll<Jcc_ParityOdd> iCMOVPO;
// ------------------------------------------------------------------------
// Assigns the current emitter buffer target address.
// This is provided instead of using x86Ptr directly, since we may in the future find
@ -390,18 +435,20 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
if( displacement_size != 0 )
{
*(s32*)x86Ptr = src.Displacement;
x86Ptr += (displacement_size == 1) ? 1 : 4;
if( displacement_size == 1 )
iWrite<s8>( src.Displacement );
else
iWrite<s32>( src.Displacement );
}
}
__emitinline void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags )
__emitinline void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags )
{
EmitLeaMagic( to, src, preserve_flags );
}
__emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags )
__emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags )
{
write8( 0x66 );
EmitLeaMagic( to, src, preserve_flags );
@ -410,7 +457,7 @@ __emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_fl
//////////////////////////////////////////////////////////////////////////////////////////
// MOV instruction Implementation
template< typename ImmType, typename SibMagicType >
template< typename ImmType >
class MovImpl
{
public:
@ -422,7 +469,7 @@ protected:
public:
// ------------------------------------------------------------------------
static __forceinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
static __forceinline void Emit( const iRegister<OperandSize>& to, const iRegister<OperandSize>& from )
{
if( to == from ) return; // ignore redundant MOVs.
@ -432,7 +479,7 @@ public:
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const ModSibBase& dest, const x86Register<OperandSize>& from )
static __forceinline void Emit( const ModSibBase& dest, const iRegister<OperandSize>& from )
{
prefix16();
@ -447,12 +494,12 @@ public:
else
{
iWrite<u8>( Is8BitOperand() ? 0x88 : 0x89 );
SibMagicType::Emit( from.Id, dest );
EmitSibMagic( from.Id, dest );
}
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& src )
static __forceinline void Emit( const iRegister<OperandSize>& to, const ModSibBase& src )
{
prefix16();
@ -467,12 +514,50 @@ public:
else
{
iWrite<u8>( Is8BitOperand() ? 0x8a : 0x8b );
SibMagicType::Emit( to.Id, src );
EmitSibMagic( to.Id, src );
}
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
static __forceinline void Emit( void* dest, const iRegister<OperandSize>& from )
{
prefix16();
// mov eax has a special from when writing directly to a DISP32 address
if( from.IsAccumulator() )
{
iWrite<u8>( Is8BitOperand() ? 0xa2 : 0xa3 );
iWrite<s32>( (s32)dest );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0x88 : 0x89 );
iWriteDisp( from.Id, dest );
}
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const iRegister<OperandSize>& to, const void* src )
{
prefix16();
// mov eax has a special from when reading directly from a DISP32 address
if( to.IsAccumulator() )
{
iWrite<u8>( Is8BitOperand() ? 0xa0 : 0xa1 );
iWrite<s32>( (s32)src );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0x8a : 0x8b );
iWriteDisp( to.Id, src );
}
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const iRegister<OperandSize>& to, ImmType imm )
{
// Note: MOV does not have (reg16/32,imm8) forms.
@ -486,20 +571,16 @@ public:
{
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xc6 : 0xc7 );
SibMagicType::Emit( 0, dest );
EmitSibMagic( 0, dest );
iWrite<ImmType>( imm );
}
};
namespace Internal
{
typedef MovImpl<u32,SibMagic> MOV32;
typedef MovImpl<u16,SibMagic> MOV16;
typedef MovImpl<u8,SibMagic> MOV8;
typedef MovImpl<u32,SibMagicInline> MOV32i;
typedef MovImpl<u16,SibMagicInline> MOV16i;
typedef MovImpl<u8,SibMagicInline> MOV8i;
typedef MovImpl<u32> MOV32;
typedef MovImpl<u16> MOV16;
typedef MovImpl<u8> MOV8;
}
// Inlining Notes:
@ -512,96 +593,72 @@ namespace Internal
// TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D
// ---------- 32 Bit Interface -----------
__forceinline void iMOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); }
__forceinline void iMOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); }
__forceinline void iMOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); }
__noinline void iMOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); }
__forceinline void iMOV( const iRegister32& to, const iRegister32& from ) { MOV32::Emit( to, from ); }
__forceinline void iMOV( const iRegister32& to, const void* src ) { MOV32::Emit( to, ptr32[src] ); }
__forceinline void iMOV( void* dest, const iRegister32& from ) { MOV32::Emit( ptr32[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const iRegister32& from ) { MOV32::Emit( sibdest, from ); }
__noinline void iMOV( const iRegister32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); }
__noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); }
void iMOV( const x86Register32& to, u32 imm, bool preserve_flags )
void iMOV( const iRegister32& to, u32 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
iXOR( to, to );
else
MOV32i::Emit( to, imm );
MOV32::Emit( to, imm );
}
// ---------- 16 Bit Interface -----------
__forceinline void iMOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); }
__forceinline void iMOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); }
__forceinline void iMOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); }
__noinline void iMOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); }
__forceinline void iMOV( const iRegister16& to, const iRegister16& from ) { MOV16::Emit( to, from ); }
__forceinline void iMOV( const iRegister16& to, const void* src ) { MOV16::Emit( to, ptr16[src] ); }
__forceinline void iMOV( void* dest, const iRegister16& from ) { MOV16::Emit( ptr16[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const iRegister16& from ) { MOV16::Emit( sibdest, from ); }
__noinline void iMOV( const iRegister16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); }
__noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); }
void iMOV( const x86Register16& to, u16 imm, bool preserve_flags )
void iMOV( const iRegister16& to, u16 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
iXOR( to, to );
else
MOV16i::Emit( to, imm );
MOV16::Emit( to, imm );
}
// ---------- 8 Bit Interface -----------
__forceinline void iMOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); }
__forceinline void iMOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); }
__forceinline void iMOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); }
__noinline void iMOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); }
__forceinline void iMOV( const iRegister8& to, const iRegister8& from ) { MOV8::Emit( to, from ); }
__forceinline void iMOV( const iRegister8& to, const void* src ) { MOV8::Emit( to, ptr8[src] ); }
__forceinline void iMOV( void* dest, const iRegister8& from ) { MOV8::Emit( ptr8[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const iRegister8& from ) { MOV8::Emit( sibdest, from ); }
__noinline void iMOV( const iRegister8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); }
__noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); }
void iMOV( const x86Register8& to, u8 imm, bool preserve_flags )
void iMOV( const iRegister8& to, u8 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
iXOR( to, to );
else
MOV8i::Emit( to, imm );
MOV8::Emit( to, imm );
}
//////////////////////////////////////////////////////////////////////////////////////////
// Miscellaneous Section!
// Various Instructions with no parameter and no special encoding logic.
//
__forceinline void RET() { write8( 0xC3 ); }
__forceinline void CBW() { write16( 0x9866 ); }
__forceinline void CWD() { write8( 0x98 ); }
__forceinline void CDQ() { write8( 0x99 ); }
__forceinline void CWDE() { write8( 0x98 ); }
__forceinline void LAHF() { write8( 0x9f ); }
__forceinline void SAHF() { write8( 0x9e ); }
//////////////////////////////////////////////////////////////////////////////////////////
// Push / Pop Emitters
//
// Note: pushad/popad implementations are intentionally left out. The instructions are
// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead.
__forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); }
__emitinline void POP( const ModSibBase& from )
__emitinline void iPOP( const ModSibBase& from )
{
iWrite<u8>( 0x8f );
Internal::EmitSibMagic( 0, from );
}
__forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); }
__forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); }
__emitinline void PUSH( const ModSibBase& from )
__emitinline void iPUSH( const ModSibBase& from )
{
iWrite<u8>( 0xff );
Internal::EmitSibMagic( 6, from );
}
// pushes the EFLAGS register onto the stack
__forceinline void PUSHFD() { write8( 0x9C ); }
// pops the EFLAGS register from the stack
__forceinline void POPFD() { write8( 0x9D ); }
}

View File

@ -42,15 +42,6 @@
// ix86_inlines.inl file when it is known that inlining of ModSib functions are
// wanted).
//
//
// Important when Using the New Emitter:
// Make sure there is *no* data in use or of importance past the end of the
// current x86Ptr. Ie, don't do fancy x86Ptr rewind tricks of your own. The
// emitter uses optimized writes which will clobber data past the end of the
// instruction it's emitting, so even if you know for sure the instruction you
// are writing is 5 bytes, the emitter will likely emit 9 bytes and the re-
// wind the x86Ptr to the end of the instruction.
//
#pragma once

View File

@ -0,0 +1,179 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
// Instructions implemented in this header are as follows -->>
enum G1Type
{
G1Type_ADD=0,
G1Type_OR,
G1Type_ADC,
G1Type_SBB,
G1Type_AND,
G1Type_SUB,
G1Type_XOR,
G1Type_CMP
};
// -------------------------------------------------------------------
template< typename ImmType, G1Type InstType >
class Group1Impl
{
public:
static const uint OperandSize = sizeof(ImmType);
Group1Impl() {} // because GCC doesn't like static classes
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __emitinline void Emit( const iRegister<OperandSize>& to, const iRegister<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
ModRM( 3, from.Id, to.Id );
}
static __emitinline void Emit( const ModSibBase& sibdest, const iRegister<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
EmitSibMagic( from.Id, sibdest );
}
static __emitinline void Emit( const iRegister<OperandSize>& to, const ModSibBase& sibsrc )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
EmitSibMagic( to.Id, sibsrc );
}
static __emitinline void Emit( void* dest, const iRegister<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
iWriteDisp( from.Id, dest );
}
static __emitinline void Emit( const iRegister<OperandSize>& to, const void* src )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
iWriteDisp( to.Id, src );
}
static __emitinline void Emit( const iRegister<OperandSize>& to, ImmType imm )
{
prefix16();
if( !Is8BitOperand() && is_s8( imm ) )
{
iWrite<u8>( 0x83 );
ModRM( 3, InstType, to.Id );
iWrite<s8>( imm );
}
else
{
if( to.IsAccumulator() )
iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) );
else
{
iWrite<u8>( Is8BitOperand() ? 0x80 : 0x81 );
ModRM( 3, InstType, to.Id );
}
iWrite<ImmType>( imm );
}
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, ImmType imm )
{
if( Is8BitOperand() )
{
iWrite<u8>( 0x80 );
EmitSibMagic( InstType, sibdest );
iWrite<ImmType>( imm );
}
else
{
prefix16();
iWrite<u8>( is_s8( imm ) ? 0x83 : 0x81 );
EmitSibMagic( InstType, sibdest );
if( is_s8( imm ) )
iWrite<s8>( imm );
else
iWrite<ImmType>( imm );
}
}
};
// -------------------------------------------------------------------
//
template< G1Type InstType >
class Group1ImplAll
{
protected:
typedef Group1Impl<u32, InstType> m_32;
typedef Group1Impl<u16, InstType> m_16;
typedef Group1Impl<u8, InstType> m_8;
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
__forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( to, from ); }
__forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( to, src ); }
__forceinline void operator()( void* dest, const iRegister32& from ) const { m_32::Emit( dest, from ); }
__noinline void operator()( const ModSibBase& sibdest, const iRegister32& from ) const { m_32::Emit( sibdest, from ); }
__noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const{ m_32::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const iRegister32& to, u32 imm, bool needs_flags=false ) const
{
//if( needs_flags || (imm != 0) || !_optimize_imm0() )
m_32::Emit( to, imm );
}
// ---------- 16 Bit Interface -----------
__forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); }
__forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( to, src ); }
__forceinline void operator()( void* dest, const iRegister16& from ) const { m_16::Emit( dest, from ); }
__noinline void operator()( const ModSibBase& sibdest, const iRegister16& from ) const { m_16::Emit( sibdest, from ); }
__noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const{ m_16::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const iRegister16& to, u16 imm, bool needs_flags=false ) const { m_16::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); }
__forceinline void operator()( const iRegister8& to, const void* src ) const { m_8::Emit( to, src ); }
__forceinline void operator()( void* dest, const iRegister8& from ) const { m_8::Emit( dest, from ); }
__noinline void operator()( const ModSibBase& sibdest, const iRegister8& from ) const { m_8::Emit( sibdest, from ); }
__noinline void operator()( const iRegister8& to, const ModSibBase& sibsrc ) const{ m_8::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const iRegister8& to, u8 imm, bool needs_flags=false ) const { m_8::Emit( to, imm ); }
Group1ImplAll() {} // Why does GCC need these?
};

View File

@ -0,0 +1,151 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
// Instructions implemented in this header are as follows -->>
enum G2Type
{
G2Type_ROL=0,
G2Type_ROR,
G2Type_RCL,
G2Type_RCR,
G2Type_SHL,
G2Type_SHR,
G2Type_Unused,
G2Type_SAR
};
// -------------------------------------------------------------------
// Group 2 (shift) instructions have no Sib/ModRM forms.
// Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero.
// This is a safe optimization since any zero-value shift does not affect any flags.
//
template< typename ImmType, G2Type InstType >
class Group2Impl
{
public:
static const uint OperandSize = sizeof(ImmType);
Group2Impl() {} // For the love of GCC.
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __emitinline void Emit( const iRegister<OperandSize>& to, const iRegister8& from )
{
jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?)
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
ModRM( 3, InstType, to.Id );
}
static __emitinline void Emit( const iRegister<OperandSize>& to, u8 imm )
{
if( imm == 0 ) return;
prefix16();
if( imm == 1 )
{
// special encoding of 1's
iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
ModRM( 3, InstType, to.Id );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
ModRM( 3, InstType, to.Id );
iWrite<u8>( imm );
}
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, const iRegister8& from )
{
jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?)
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
EmitSibMagic( from.Id, sibdest );
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, u8 imm )
{
if( imm == 0 ) return;
prefix16();
if( imm == 1 )
{
// special encoding of 1's
iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
EmitSibMagic( InstType, sibdest );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
EmitSibMagic( InstType, sibdest );
iWrite<u8>( imm );
}
}
};
// -------------------------------------------------------------------
//
template< G2Type InstType >
class Group2ImplAll
{
protected:
typedef Group2Impl<u32, InstType> m_32;
typedef Group2Impl<u16, InstType> m_16;
typedef Group2Impl<u8, InstType> m_8;
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// creation of the compiler.
//
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
__forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_32::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, const iRegister8& from ) const { m_32::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const iRegister32& to, u8 imm ) const { m_32::Emit( to, imm ); }
// ---------- 16 Bit Interface -----------
__forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_16::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, const iRegister8& from ) const { m_16::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const iRegister16& to, u8 imm ) const { m_16::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, const iRegister8& from ) const { m_8::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const iRegister8& to, u8 imm ) const { m_8::Emit( to, imm ); }
Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds.
};

View File

@ -0,0 +1,157 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
// Header: ix86_impl_movs.h -- covers cmov and movsx/movzx.
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
//////////////////////////////////////////////////////////////////////////////////////////
// CMOV !! [in all of it's disappointing lack-of glory]
//
template< int OperandSize >
class CMovImpl
{
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
static __forceinline void emit_base( JccComparisonType cc )
{
jASSUME( cc >= 0 && cc <= 0x0f );
prefix16();
write8( 0x0f );
write8( 0x40 | cc );
}
public:
CMovImpl() {}
static __emitinline void Emit( JccComparisonType cc, const iRegister<OperandSize>& to, const iRegister<OperandSize>& from )
{
emit_base( cc );
ModRM( 3, to.Id, from.Id );
}
static __emitinline void Emit( JccComparisonType cc, const iRegister<OperandSize>& to, const void* src )
{
emit_base( cc );
iWriteDisp( to.Id, src );
}
static __emitinline void Emit( JccComparisonType cc, const iRegister<OperandSize>& to, const ModSibBase& sibsrc )
{
emit_base( cc );
EmitSibMagic( to.Id, sibsrc );
}
};
// ------------------------------------------------------------------------
class CMovImplGeneric
{
protected:
typedef CMovImpl<4> m_32;
typedef CMovImpl<2> m_16;
public:
__forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); }
__forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); }
__noinline void operator()( JccComparisonType ccType, const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); }
__forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const iRegister16& from ) const { m_16::Emit( ccType, to, from ); }
__forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); }
__noinline void operator()( JccComparisonType ccType, const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); }
CMovImplGeneric() {} // don't ask.
};
// ------------------------------------------------------------------------
template< JccComparisonType ccType >
class CMovImplAll
{
protected:
typedef CMovImpl<4> m_32;
typedef CMovImpl<2> m_16;
public:
__forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); }
__forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); }
__noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); }
__forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( ccType, to, from ); }
__forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); }
__noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); }
CMovImplAll() {} // don't ask.
};
//////////////////////////////////////////////////////////////////////////////////////////
// Mov with sign/zero extension implementations (movsx / movzx)
//
template< int DestOperandSize, int SrcOperandSize >
class MovExtendImpl
{
protected:
static bool Is8BitOperand() { return SrcOperandSize == 1; }
static void prefix16() { if( DestOperandSize == 2 ) iWrite<u8>( 0x66 ); }
static __forceinline void emit_base( bool SignExtend )
{
prefix16();
iWrite<u8>( 0x0f );
iWrite<u8>( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) );
}
public:
MovExtendImpl() {} // For the love of GCC.
static __emitinline void Emit( const iRegister<DestOperandSize>& to, const iRegister<SrcOperandSize>& from, bool SignExtend )
{
emit_base( SignExtend );
ModRM( 3, from.Id, to.Id );
}
static __emitinline void Emit( const iRegister<DestOperandSize>& to, const ModSibStrict<SrcOperandSize>& sibsrc, bool SignExtend )
{
emit_base( SignExtend );
EmitSibMagic( to.Id, sibsrc );
}
};
// ------------------------------------------------------------------------
template< bool SignExtend >
class MovExtendImplAll
{
protected:
typedef MovExtendImpl<4, 2> m_16to32;
typedef MovExtendImpl<4, 1> m_8to32;
typedef MovExtendImpl<2, 1> m_8to16;
public:
__forceinline void operator()( const iRegister32& to, const iRegister16& from ) const { m_16to32::Emit( to, from, SignExtend ); }
__noinline void operator()( const iRegister32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); }
__forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_8to32::Emit( to, from, SignExtend ); }
__noinline void operator()( const iRegister32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); }
__forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_8to16::Emit( to, from, SignExtend ); }
__noinline void operator()( const iRegister16& to, const ModSibStrict<1>& sibsrc ) const { m_8to16::Emit( to, sibsrc, SignExtend ); }
MovExtendImplAll() {} // don't ask.
};

View File

@ -53,29 +53,29 @@ namespace x86Emitter
//////////////////////////////////////////////////////////////////////////////////////////
// x86Register Method Implementations
//
__forceinline x86AddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const
__forceinline iAddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const
{
return x86AddressInfo( *this, right );
return iAddressInfo( *this, right );
}
__forceinline x86AddressInfo x86IndexReg::operator+( const x86AddressInfo& right ) const
__forceinline iAddressInfo x86IndexReg::operator+( const iAddressInfo& right ) const
{
return right + *this;
}
__forceinline x86AddressInfo x86IndexReg::operator+( s32 right ) const
__forceinline iAddressInfo x86IndexReg::operator+( s32 right ) const
{
return x86AddressInfo( *this, right );
return iAddressInfo( *this, right );
}
__forceinline x86AddressInfo x86IndexReg::operator*( u32 right ) const
__forceinline iAddressInfo x86IndexReg::operator*( u32 right ) const
{
return x86AddressInfo( Empty, *this, right );
return iAddressInfo( Empty, *this, right );
}
__forceinline x86AddressInfo x86IndexReg::operator<<( u32 shift ) const
__forceinline iAddressInfo x86IndexReg::operator<<( u32 shift ) const
{
return x86AddressInfo( Empty, *this, 1<<shift );
return iAddressInfo( Empty, *this, 1<<shift );
}
//////////////////////////////////////////////////////////////////////////////////////////
@ -83,7 +83,7 @@ namespace x86Emitter
//
// ------------------------------------------------------------------------
__forceinline ModSibBase::ModSibBase( const x86AddressInfo& src ) :
__forceinline ModSibBase::ModSibBase( const iAddressInfo& src ) :
Base( src.Base ),
Index( src.Index ),
Scale( src.Factor ),
@ -181,9 +181,9 @@ namespace x86Emitter
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86AddressInfo Method Implementations
// iAddressInfo Method Implementations
//
__forceinline x86AddressInfo& x86AddressInfo::Add( const x86IndexReg& src )
__forceinline iAddressInfo& iAddressInfo::Add( const x86IndexReg& src )
{
if( src == Index )
{
@ -214,7 +214,7 @@ namespace x86Emitter
}
// ------------------------------------------------------------------------
__forceinline x86AddressInfo& x86AddressInfo::Add( const x86AddressInfo& src )
__forceinline iAddressInfo& iAddressInfo::Add( const iAddressInfo& src )
{
Add( src.Base );
Add( src.Displacement );

View File

@ -43,76 +43,80 @@ namespace x86Emitter
// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
// instead.
extern void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags=false );
extern void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags=false );
extern void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags=false );
extern void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags=false );
// ----- Push / Pop Instructions -----
// Note: pushad/popad implementations are intentionally left out. The instructions are
// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead.
extern void POP( x86Register32 from );
extern void POP( const ModSibBase& from );
extern void iPOP( const ModSibBase& from );
extern void iPUSH( const ModSibBase& from );
extern void PUSH( u32 imm );
extern void PUSH( x86Register32 from );
extern void PUSH( const ModSibBase& from );
static __forceinline void iPOP( iRegister32 from ) { write8( 0x58 | from.Id ); }
static __forceinline void iPOP( void* from ) { iPOP( ptr[from] ); }
static __forceinline void POP( void* from ) { POP( ptr[from] ); }
static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); }
static __forceinline void iPUSH( u32 imm ) { write8( 0x68 ); write32( imm ); }
static __forceinline void iPUSH( iRegister32 from ) { write8( 0x50 | from.Id ); }
static __forceinline void iPUSH( void* from ) { iPUSH( ptr[from] ); }
// ------------------------------------------------------------------------
using Internal::iADD;
using Internal::iOR;
using Internal::iADC;
using Internal::iSBB;
using Internal::iAND;
using Internal::iSUB;
using Internal::iXOR;
using Internal::iCMP;
// pushes the EFLAGS register onto the stack
static __forceinline void iPUSHFD() { write8( 0x9C ); }
// pops the EFLAGS register from the stack
static __forceinline void iPOPFD() { write8( 0x9D ); }
using Internal::iROL;
using Internal::iROR;
using Internal::iRCL;
using Internal::iRCR;
using Internal::iSHL;
using Internal::iSHR;
using Internal::iSAR;
// ----- Miscellaneous Instructions -----
// Various Instructions with no parameter and no special encoding logic.
using Internal::iMOVSX;
using Internal::iMOVZX;
__forceinline void iRET() { write8( 0xC3 ); }
__forceinline void iCBW() { write16( 0x9866 ); }
__forceinline void iCWD() { write8( 0x98 ); }
__forceinline void iCDQ() { write8( 0x99 ); }
__forceinline void iCWDE() { write8( 0x98 ); }
__forceinline void iLAHF() { write8( 0x9f ); }
__forceinline void iSAHF() { write8( 0x9e ); }
__forceinline void iSTC() { write8( 0xF9 ); }
__forceinline void iCLC() { write8( 0xF8 ); }
// NOP 1-byte
__forceinline void iNOP() { write8(0x90); }
//////////////////////////////////////////////////////////////////////////////////////////
// MOV instructions!
// ---------- 32 Bit Interface -----------
extern void iMOV( const x86Register32& to, const x86Register32& from );
extern void iMOV( const ModSibBase& sibdest, const x86Register32& from );
extern void iMOV( const x86Register32& to, const ModSibBase& sibsrc );
extern void iMOV( const x86Register32& to, const void* src );
extern void iMOV( const void* dest, const x86Register32& from );
extern void iMOV( const iRegister32& to, const iRegister32& from );
extern void iMOV( const ModSibBase& sibdest, const iRegister32& from );
extern void iMOV( const iRegister32& to, const ModSibBase& sibsrc );
extern void iMOV( const iRegister32& to, const void* src );
extern void iMOV( void* dest, const iRegister32& from );
// preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
extern void iMOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
extern void iMOV( const iRegister32& to, u32 imm, bool preserve_flags=false );
extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm );
// ---------- 16 Bit Interface -----------
extern void iMOV( const x86Register16& to, const x86Register16& from );
extern void iMOV( const ModSibBase& sibdest, const x86Register16& from );
extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc );
extern void iMOV( const x86Register16& to, const void* src );
extern void iMOV( const void* dest, const x86Register16& from );
extern void iMOV( const iRegister16& to, const iRegister16& from );
extern void iMOV( const ModSibBase& sibdest, const iRegister16& from );
extern void iMOV( const iRegister16& to, const ModSibBase& sibsrc );
extern void iMOV( const iRegister16& to, const void* src );
extern void iMOV( void* dest, const iRegister16& from );
// preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
extern void iMOV( const iRegister16& to, u16 imm, bool preserve_flags=false );
extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm );
// ---------- 8 Bit Interface -----------
extern void iMOV( const x86Register8& to, const x86Register8& from );
extern void iMOV( const ModSibBase& sibdest, const x86Register8& from );
extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc );
extern void iMOV( const x86Register8& to, const void* src );
extern void iMOV( const void* dest, const x86Register8& from );
extern void iMOV( const iRegister8& to, const iRegister8& from );
extern void iMOV( const ModSibBase& sibdest, const iRegister8& from );
extern void iMOV( const iRegister8& to, const ModSibBase& sibsrc );
extern void iMOV( const iRegister8& to, const void* src );
extern void iMOV( void* dest, const iRegister8& from );
extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
extern void iMOV( const iRegister8& to, u8 imm, bool preserve_flags=false );
extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm );
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -35,9 +35,9 @@
using namespace x86Emitter;
template< int OperandSize >
static __forceinline x86Register<OperandSize> _reghlp( x86IntRegType src )
static __forceinline iRegister<OperandSize> _reghlp( x86IntRegType src )
{
return x86Register<OperandSize>( src );
return iRegister<OperandSize>( src );
}
static __forceinline ModSibBase _mrmhlp( x86IntRegType src )
@ -116,31 +116,34 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR )
DEFINE_OPCODE_LEGACY( MOV )
// ------------------------------------------------------------------------
#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \
emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \
emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \
emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); }
#define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \
emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \
emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##( iRegister##destbits( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \
emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##( iRegister##destbits( to ), ptr##srcbits[from] ); }
DEFINE_LEGACY_MOVEXTEND( S, 16 )
DEFINE_LEGACY_MOVEXTEND( Z, 16 )
DEFINE_LEGACY_MOVEXTEND( S, 8 )
DEFINE_LEGACY_MOVEXTEND( Z, 8 )
DEFINE_LEGACY_MOVEXTEND( SX, 32, 16 )
DEFINE_LEGACY_MOVEXTEND( ZX, 32, 16 )
DEFINE_LEGACY_MOVEXTEND( SX, 32, 8 )
DEFINE_LEGACY_MOVEXTEND( ZX, 32, 8 )
DEFINE_LEGACY_MOVEXTEND( SX, 16, 8 )
DEFINE_LEGACY_MOVEXTEND( ZX, 16, 8 )
// mov r32 to [r32<<scale+from2]
emitterT void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
iMOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
iMOV( iRegister32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
iMOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
iMOV( iRegister16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
iMOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
iMOV( iRegister8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
// Special forms needed by the legacy emitter syntax:
@ -155,6 +158,11 @@ emitterT void AND32I8toM( uptr to, s8 from )
iAND( ptr8[to], from );
}
/* cmove r32 to r32*/
emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from )
{
iCMOVE( iRegister32(to), iRegister32(from) );
}
// Note: the 'to' field can either be a register or a special opcode extension specifier
@ -224,23 +232,6 @@ emitterT u32* J32Rel( int cc, u32 to )
return (u32*)( x86Ptr - 4 );
}
emitterT void CMOV32RtoR( int cc, int to, int from )
{
RexRB(0, to, from);
write8( 0x0F );
write8( cc );
ModRM( 3, to, from );
}
emitterT void CMOV32MtoR( int cc, int to, uptr from )
{
RexR(0, to);
write8( 0x0F );
write8( cc );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
////////////////////////////////////////////////////
emitterT void x86SetPtr( u8* ptr )
{
@ -308,213 +299,9 @@ emitterT void x86Align( int bytes )
/* IX86 instructions */
/********************/
emitterT void STC( void )
{
write8( 0xF9 );
}
emitterT void CLC( void )
{
write8( 0xF8 );
}
// NOP 1-byte
emitterT void NOP( void )
{
write8(0x90);
}
/* cmovbe r32 to r32 */
emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x46, to, from );
}
/* cmovbe m32 to r32*/
emitterT void CMOVBE32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x46, to, from );
}
/* cmovb r32 to r32 */
emitterT void CMOVB32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x42, to, from );
}
/* cmovb m32 to r32*/
emitterT void CMOVB32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x42, to, from );
}
/* cmovae r32 to r32 */
emitterT void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x43, to, from );
}
/* cmovae m32 to r32*/
emitterT void CMOVAE32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x43, to, from );
}
/* cmova r32 to r32 */
emitterT void CMOVA32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x47, to, from );
}
/* cmova m32 to r32*/
emitterT void CMOVA32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x47, to, from );
}
/* cmovo r32 to r32 */
emitterT void CMOVO32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x40, to, from );
}
/* cmovo m32 to r32 */
emitterT void CMOVO32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x40, to, from );
}
/* cmovp r32 to r32 */
emitterT void CMOVP32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x4A, to, from );
}
/* cmovp m32 to r32 */
emitterT void CMOVP32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x4A, to, from );
}
/* cmovs r32 to r32 */
emitterT void CMOVS32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x48, to, from );
}
/* cmovs m32 to r32 */
emitterT void CMOVS32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x48, to, from );
}
/* cmovno r32 to r32 */
emitterT void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x41, to, from );
}
/* cmovno m32 to r32 */
emitterT void CMOVNO32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x41, to, from );
}
/* cmovnp r32 to r32 */
emitterT void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x4B, to, from );
}
/* cmovnp m32 to r32 */
emitterT void CMOVNP32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x4B, to, from );
}
/* cmovns r32 to r32 */
emitterT void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x49, to, from );
}
/* cmovns m32 to r32 */
emitterT void CMOVNS32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x49, to, from );
}
/* cmovne r32 to r32 */
emitterT void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x45, to, from );
}
/* cmovne m32 to r32*/
emitterT void CMOVNE32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x45, to, from );
}
/* cmove r32 to r32*/
emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x44, to, from );
}
/* cmove m32 to r32*/
emitterT void CMOVE32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x44, to, from );
}
/* cmovg r32 to r32*/
emitterT void CMOVG32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x4F, to, from );
}
/* cmovg m32 to r32*/
emitterT void CMOVG32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x4F, to, from );
}
/* cmovge r32 to r32*/
emitterT void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x4D, to, from );
}
/* cmovge m32 to r32*/
emitterT void CMOVGE32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x4D, to, from );
}
/* cmovl r32 to r32*/
emitterT void CMOVL32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x4C, to, from );
}
/* cmovl m32 to r32*/
emitterT void CMOVL32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x4C, to, from );
}
/* cmovle r32 to r32*/
emitterT void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from )
{
CMOV32RtoR( 0x4E, to, from );
}
/* cmovle m32 to r32*/
emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from )
{
CMOV32MtoR( 0x4E, to, from );
}
emitterT void STC( void ) { iSTC(); }
emitterT void CLC( void ) { iCLC(); }
emitterT void NOP( void ) { iNOP(); }
////////////////////////////////////
// arithmetic instructions /
@ -1173,34 +960,31 @@ emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); }
emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); }
/* push imm32 */
emitterT void PUSH32I( u32 from ) { PUSH( from ); }
emitterT void PUSH32I( u32 from ) { iPUSH( from ); }
/* push r32 */
emitterT void PUSH32R( x86IntRegType from ) { PUSH( x86Register32( from ) ); }
emitterT void PUSH32R( x86IntRegType from ) { iPUSH( iRegister32( from ) ); }
/* push m32 */
emitterT void PUSH32M( u32 from )
{
PUSH( ptr[from] );
iPUSH( ptr[from] );
}
/* pop r32 */
emitterT void POP32R( x86IntRegType from ) { POP( x86Register32( from ) ); }
/* pushfd */
emitterT void POP32R( x86IntRegType from ) { iPOP( iRegister32( from ) ); }
emitterT void PUSHFD( void ) { write8( 0x9C ); }
/* popfd */
emitterT void POPFD( void ) { write8( 0x9D ); }
emitterT void RET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); }
emitterT void RET( void ) { iRET(); }
emitterT void CBW( void ) { write16( 0x9866 ); }
emitterT void CWD( void ) { write8( 0x98 ); }
emitterT void CDQ( void ) { write8( 0x99 ); }
emitterT void CWDE() { write8(0x98); }
emitterT void CBW( void ) { iCBW(); }
emitterT void CWD( void ) { iCWD(); }
emitterT void CDQ( void ) { iCDQ(); }
emitterT void CWDE() { iCWDE(); }
emitterT void LAHF() { write8(0x9f); }
emitterT void SAHF() { write8(0x9e); }
emitterT void LAHF() { iLAHF(); }
emitterT void SAHF() { iSAHF(); }
emitterT void BT32ItoR( x86IntRegType to, u8 from )
{
@ -1230,34 +1014,34 @@ emitterT void BSWAP32R( x86IntRegType to )
emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset)
{
LEA( x86Register32( to ), ptr[x86IndexReg(from)+offset] );
iLEA( iRegister32( to ), ptr[x86IndexReg(from)+offset] );
}
emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
{
LEA( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
iLEA( iRegister32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
}
// Don't inline recursive functions
emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
{
LEA( x86Register32( to ), ptr[x86IndexReg(from)*(1<<scale)] );
iLEA( iRegister32( to ), ptr[x86IndexReg(from)*(1<<scale)] );
}
// to = from + offset
emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset)
{
LEA( x86Register16( to ), ptr[x86IndexReg(from)+offset] );
iLEA( iRegister16( to ), ptr[x86IndexReg(from)+offset] );
}
// to = from0 + from1
emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
{
LEA( x86Register16( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
iLEA( iRegister16( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
}
// to = from << scale (max is 3)
emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
{
LEA( x86Register16( to ), ptr[x86IndexReg(from)*(1<<scale)] );
iLEA( iRegister16( to ), ptr[x86IndexReg(from)*(1<<scale)] );
}

View File

@ -56,11 +56,7 @@ emitterT void ModRM( uint mod, uint reg, uint rm )
jASSUME( mod < 4 );
jASSUME( reg < 8 );
jASSUME( rm < 8 );
//write8( (mod << 6) | (reg << 3) | rm );
*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
x86Ptr++;
write8( (mod << 6) | (reg << 3) | rm );
}
emitterT void SibSB( uint ss, uint index, uint base )
@ -71,8 +67,5 @@ emitterT void SibSB( uint ss, uint index, uint base )
jASSUME( ss < 4 );
jASSUME( index < 8 );
jASSUME( base < 8 );
//write8( (ss << 6) | (index << 3) | base );
*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
x86Ptr++;
write8( (ss << 6) | (index << 3) | base );
}

View File

@ -37,21 +37,21 @@ static const bool AlwaysUseMovaps = true;
//------------------------------------------------------------------
#define SSEMtoR( code, overb ) \
assert( to < XMMREGS ), \
assert( to < iREGCNT_XMM ), \
RexR(0, to), \
write16( code ), \
ModRM( 0, to, DISP32 ), \
write32( MEMADDR(from, 4 + overb) )
#define SSERtoM( code, overb ) \
assert( from < XMMREGS), \
assert( from < iREGCNT_XMM), \
RexR(0, from), \
write16( code ), \
ModRM( 0, from, DISP32 ), \
write32( MEMADDR(to, 4 + overb) )
#define SSE_SS_MtoR( code, overb ) \
assert( to < XMMREGS ), \
assert( to < iREGCNT_XMM ), \
write8( 0xf3 ), \
RexR(0, to), \
write16( code ), \
@ -59,7 +59,7 @@ static const bool AlwaysUseMovaps = true;
write32( MEMADDR(from, 4 + overb) )
#define SSE_SS_RtoM( code, overb ) \
assert( from < XMMREGS), \
assert( from < iREGCNT_XMM), \
write8( 0xf3 ), \
RexR(0, from), \
write16( code ), \
@ -67,7 +67,7 @@ static const bool AlwaysUseMovaps = true;
write32( MEMADDR(to, 4 + overb) )
#define SSERtoR( code ) \
assert( to < XMMREGS && from < XMMREGS), \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
RexRB(0, to, from), \
write16( code ), \
ModRM( 3, to, from )
@ -85,21 +85,21 @@ static const bool AlwaysUseMovaps = true;
SSERtoR( code )
#define _SSERtoR66( code ) \
assert( to < XMMREGS && from < XMMREGS), \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
write8( 0x66 ), \
RexRB(0, from, to), \
write16( code ), \
ModRM( 3, from, to )
#define SSE_SS_RtoR( code ) \
assert( to < XMMREGS && from < XMMREGS), \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \
write8( 0xf3 ), \
RexRB(0, to, from), \
write16( code ), \
ModRM( 3, to, from )
#define SSE_SD_MtoR( code, overb ) \
assert( to < XMMREGS ) , \
assert( to < iREGCNT_XMM ) , \
write8( 0xf2 ), \
RexR(0, to), \
write16( code ), \
@ -107,7 +107,7 @@ static const bool AlwaysUseMovaps = true;
write32( MEMADDR(from, 4 + overb) ) \
#define SSE_SD_RtoM( code, overb ) \
assert( from < XMMREGS) , \
assert( from < iREGCNT_XMM) , \
write8( 0xf2 ), \
RexR(0, from), \
write16( code ), \
@ -115,7 +115,7 @@ static const bool AlwaysUseMovaps = true;
write32( MEMADDR(to, 4 + overb) ) \
#define SSE_SD_RtoR( code ) \
assert( to < XMMREGS && from < XMMREGS) , \
assert( to < iREGCNT_XMM && from < iREGCNT_XMM) , \
write8( 0xf2 ), \
RexRB(0, to, from), \
write16( code ), \

View File

@ -28,7 +28,7 @@ u8 g_globalMMXSaved = 0;
u8 g_globalXMMSaved = 0;
PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*XMMREGS] );
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] );
/////////////////////////////////////////////////////////////////////

View File

@ -91,10 +91,10 @@ extern CPUINFO cpuinfo;
#define __threadlocal __thread
#endif
// x86 opcode descriptors
#define XMMREGS 8
#define X86REGS 8
#define MMXREGS 8
// Register counts for x86/32 mode:
static const uint iREGCNT_XMM = 8;
static const uint iREGCNT_GPR = 8;
static const uint iREGCNT_MMX = 8;
enum XMMSSEType
{
@ -104,10 +104,10 @@ enum XMMSSEType
};
extern __threadlocal u8 *x86Ptr;
extern __threadlocal u8 *j8Ptr[32];
extern __threadlocal u32 *j32Ptr[32];
extern __threadlocal u8 *j8Ptr[32]; // depreciated item. use local u8* vars instead.
extern __threadlocal u32 *j32Ptr[32]; // depreciated item. use local u32* vars instead.
extern __threadlocal XMMSSEType g_xmmtypes[XMMREGS];
extern __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM];
//------------------------------------------------------------------
// templated version of is_s8 is required, so that u16's get correct sign extension treatment.
@ -150,7 +150,7 @@ namespace x86Emitter
static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field)
static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
class x86AddressInfo;
class iAddressInfo;
class ModSibBase;
extern void iSetPtr( void* ptr );
@ -188,33 +188,33 @@ namespace x86Emitter
//////////////////////////////////////////////////////////////////////////////////////////
//
template< int OperandSize >
class x86Register
class iRegister
{
public:
static const x86Register Empty; // defined as an empty/unused value (-1)
static const iRegister Empty; // defined as an empty/unused value (-1)
int Id;
x86Register( const x86Register<OperandSize>& src ) : Id( src.Id ) {}
x86Register(): Id( -1 ) {}
explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
iRegister( const iRegister<OperandSize>& src ) : Id( src.Id ) {}
iRegister(): Id( -1 ) {}
explicit iRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
bool IsEmpty() const { return Id < 0; }
// Returns true if the register is a valid accumulator: Eax, Ax, Al.
bool IsAccumulator() const { return Id == 0; }
bool operator==( const x86Register<OperandSize>& src ) const
bool operator==( const iRegister<OperandSize>& src ) const
{
return (Id == src.Id);
}
bool operator!=( const x86Register<OperandSize>& src ) const
bool operator!=( const iRegister<OperandSize>& src ) const
{
return (Id != src.Id);
}
x86Register<OperandSize>& operator=( const x86Register<OperandSize>& src )
iRegister<OperandSize>& operator=( const iRegister<OperandSize>& src )
{
Id = src.Id;
return *this;
@ -229,62 +229,62 @@ namespace x86Emitter
// all about the the templated code in haphazard fashion. Yay.. >_<
//
typedef x86Register<4> x86Register32;
typedef x86Register<2> x86Register16;
typedef x86Register<1> x86Register8;
typedef iRegister<4> iRegister32;
typedef iRegister<2> iRegister16;
typedef iRegister<1> iRegister8;
extern const x86Register32 eax;
extern const x86Register32 ebx;
extern const x86Register32 ecx;
extern const x86Register32 edx;
extern const x86Register32 esi;
extern const x86Register32 edi;
extern const x86Register32 ebp;
extern const x86Register32 esp;
extern const iRegister32 eax;
extern const iRegister32 ebx;
extern const iRegister32 ecx;
extern const iRegister32 edx;
extern const iRegister32 esi;
extern const iRegister32 edi;
extern const iRegister32 ebp;
extern const iRegister32 esp;
extern const x86Register16 ax;
extern const x86Register16 bx;
extern const x86Register16 cx;
extern const x86Register16 dx;
extern const x86Register16 si;
extern const x86Register16 di;
extern const x86Register16 bp;
extern const x86Register16 sp;
extern const iRegister16 ax;
extern const iRegister16 bx;
extern const iRegister16 cx;
extern const iRegister16 dx;
extern const iRegister16 si;
extern const iRegister16 di;
extern const iRegister16 bp;
extern const iRegister16 sp;
extern const x86Register8 al;
extern const x86Register8 cl;
extern const x86Register8 dl;
extern const x86Register8 bl;
extern const x86Register8 ah;
extern const x86Register8 ch;
extern const x86Register8 dh;
extern const x86Register8 bh;
extern const iRegister8 al;
extern const iRegister8 cl;
extern const iRegister8 dl;
extern const iRegister8 bl;
extern const iRegister8 ah;
extern const iRegister8 ch;
extern const iRegister8 dh;
extern const iRegister8 bh;
//////////////////////////////////////////////////////////////////////////////////////////
// Use 32 bit registers as out index register (for ModSib memory address calculations)
// Only x86IndexReg provides operators for constructing x86AddressInfo types.
class x86IndexReg : public x86Register32
// Only x86IndexReg provides operators for constructing iAddressInfo types.
class x86IndexReg : public iRegister32
{
public:
static const x86IndexReg Empty; // defined as an empty/unused value (-1)
public:
x86IndexReg(): x86Register32() {}
x86IndexReg( const x86IndexReg& src ) : x86Register32( src.Id ) {}
x86IndexReg( const x86Register32& src ) : x86Register32( src ) {}
explicit x86IndexReg( int regId ) : x86Register32( regId ) {}
x86IndexReg(): iRegister32() {}
x86IndexReg( const x86IndexReg& src ) : iRegister32( src.Id ) {}
x86IndexReg( const iRegister32& src ) : iRegister32( src ) {}
explicit x86IndexReg( int regId ) : iRegister32( regId ) {}
// Returns true if the register is the stack pointer: ESP.
bool IsStackPointer() const { return Id == 4; }
x86AddressInfo operator+( const x86IndexReg& right ) const;
x86AddressInfo operator+( const x86AddressInfo& right ) const;
x86AddressInfo operator+( s32 right ) const;
iAddressInfo operator+( const x86IndexReg& right ) const;
iAddressInfo operator+( const iAddressInfo& right ) const;
iAddressInfo operator+( s32 right ) const;
x86AddressInfo operator*( u32 factor ) const;
x86AddressInfo operator<<( u32 shift ) const;
iAddressInfo operator*( u32 factor ) const;
iAddressInfo operator<<( u32 shift ) const;
x86IndexReg& operator=( const x86Register32& src )
x86IndexReg& operator=( const iRegister32& src )
{
Id = src.Id;
return *this;
@ -293,7 +293,7 @@ namespace x86Emitter
//////////////////////////////////////////////////////////////////////////////////////////
//
class x86AddressInfo
class iAddressInfo
{
public:
x86IndexReg Base; // base register (no scale)
@ -302,7 +302,7 @@ namespace x86Emitter
s32 Displacement; // address displacement
public:
__forceinline x86AddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) :
__forceinline iAddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) :
Base( base ),
Index( index ),
Factor( factor ),
@ -310,7 +310,7 @@ namespace x86Emitter
{
}
__forceinline explicit x86AddressInfo( const x86IndexReg& base, int displacement=0 ) :
__forceinline explicit iAddressInfo( const x86IndexReg& base, int displacement=0 ) :
Base( base ),
Index(),
Factor(0),
@ -318,7 +318,7 @@ namespace x86Emitter
{
}
__forceinline explicit x86AddressInfo( s32 displacement ) :
__forceinline explicit iAddressInfo( s32 displacement ) :
Base(),
Index(),
Factor(0),
@ -326,24 +326,24 @@ namespace x86Emitter
{
}
static x86AddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 );
static iAddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 );
public:
bool IsByteSizeDisp() const { return is_s8( Displacement ); }
__forceinline x86AddressInfo& Add( s32 imm )
__forceinline iAddressInfo& Add( s32 imm )
{
Displacement += imm;
return *this;
}
__forceinline x86AddressInfo& Add( const x86IndexReg& src );
__forceinline x86AddressInfo& Add( const x86AddressInfo& src );
__forceinline iAddressInfo& Add( const x86IndexReg& src );
__forceinline iAddressInfo& Add( const iAddressInfo& src );
__forceinline x86AddressInfo operator+( const x86IndexReg& right ) const { return x86AddressInfo( *this ).Add( right ); }
__forceinline x86AddressInfo operator+( const x86AddressInfo& right ) const { return x86AddressInfo( *this ).Add( right ); }
__forceinline x86AddressInfo operator+( s32 imm ) const { return x86AddressInfo( *this ).Add( imm ); }
__forceinline x86AddressInfo operator-( s32 imm ) const { return x86AddressInfo( *this ).Add( -imm ); }
__forceinline iAddressInfo operator+( const x86IndexReg& right ) const { return iAddressInfo( *this ).Add( right ); }
__forceinline iAddressInfo operator+( const iAddressInfo& right ) const { return iAddressInfo( *this ).Add( right ); }
__forceinline iAddressInfo operator+( s32 imm ) const { return iAddressInfo( *this ).Add( imm ); }
__forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); }
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -351,12 +351,12 @@ namespace x86Emitter
//
// This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means
// that the Base, Index, Scale, and Displacement values are all in the correct arrange-
// ments, and it serves as a type-safe layer between the x86Register's operators (which
// generate x86AddressInfo types) and the emitter's ModSib instruction forms. Without this,
// the x86Register would pass as a ModSib type implicitly, and that would cause ambiguity
// ments, and it serves as a type-safe layer between the iRegister's operators (which
// generate iAddressInfo types) and the emitter's ModSib instruction forms. Without this,
// the iRegister would pass as a ModSib type implicitly, and that would cause ambiguity
// on a number of instructions.
//
// End users should always use x86AddressInfo instead.
// End users should always use iAddressInfo instead.
//
class ModSibBase
{
@ -367,7 +367,7 @@ namespace x86Emitter
s32 Displacement; // offset applied to the Base/Index registers.
public:
explicit ModSibBase( const x86AddressInfo& src );
explicit ModSibBase( const iAddressInfo& src );
explicit ModSibBase( s32 disp );
ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 );
@ -394,7 +394,7 @@ namespace x86Emitter
class ModSibStrict : public ModSibBase
{
public:
__forceinline explicit ModSibStrict( const x86AddressInfo& src ) : ModSibBase( src ) {}
__forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {}
__forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {}
__forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) :
ModSibBase( base, index, scale, displacement ) {}
@ -423,7 +423,7 @@ namespace x86Emitter
return ModSibBase( src, x86IndexReg::Empty );
}
__forceinline ModSibBase operator[]( const x86AddressInfo& src ) const
__forceinline ModSibBase operator[]( const iAddressInfo& src ) const
{
return ModSibBase( src );
}
@ -457,7 +457,7 @@ namespace x86Emitter
return ModSibStrict<OperandSize>( src, x86IndexReg::Empty );
}
__forceinline ModSibStrict<OperandSize> operator[]( const x86AddressInfo& src ) const
__forceinline ModSibStrict<OperandSize> operator[]( const iAddressInfo& src ) const
{
return ModSibStrict<OperandSize>( src );
}
@ -598,387 +598,14 @@ namespace x86Emitter
{
extern void ModRM( uint mod, uint reg, uint rm );
extern void SibSB( u32 ss, u32 index, u32 base );
extern void iWriteDisp( int regfield, s32 displacement );
extern void iWriteDisp( int regfield, const void* address );
extern void EmitSibMagic( uint regfield, const ModSibBase& info );
struct SibMagic
{
static void Emit( uint regfield, const ModSibBase& info )
{
EmitSibMagic( regfield, info );
}
};
struct SibMagicInline
{
static __forceinline void Emit( uint regfield, const ModSibBase& info )
{
EmitSibMagic( regfield, info );
}
};
enum G1Type
{
G1Type_ADD=0,
G1Type_OR,
G1Type_ADC,
G1Type_SBB,
G1Type_AND,
G1Type_SUB,
G1Type_XOR,
G1Type_CMP
};
enum G2Type
{
G2Type_ROL=0,
G2Type_ROR,
G2Type_RCL,
G2Type_RCR,
G2Type_SHL,
G2Type_SHR,
G2Type_Unused,
G2Type_SAR
};
// -------------------------------------------------------------------
template< typename ImmType, G1Type InstType, typename SibMagicType >
class Group1Impl
{
public:
static const uint OperandSize = sizeof(ImmType);
Group1Impl() {} // because GCC doesn't like static classes
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __emitinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
ModRM( 3, from.Id, to.Id );
}
static __emitinline void Emit( const ModSibBase& sibdest, const x86Register<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
SibMagicType::Emit( from.Id, sibdest );
}
static __emitinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& sibsrc )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
SibMagicType::Emit( to.Id, sibsrc );
}
static __emitinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
{
prefix16();
if( !Is8BitOperand() && is_s8( imm ) )
{
iWrite<u8>( 0x83 );
ModRM( 3, InstType, to.Id );
iWrite<s8>( imm );
}
else
{
if( to.IsAccumulator() )
iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) );
else
{
iWrite<u8>( Is8BitOperand() ? 0x80 : 0x81 );
ModRM( 3, InstType, to.Id );
}
iWrite<ImmType>( imm );
}
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, ImmType imm )
{
if( Is8BitOperand() )
{
iWrite<u8>( 0x80 );
SibMagicType::Emit( InstType, sibdest );
iWrite<ImmType>( imm );
}
else
{
prefix16();
iWrite<u8>( is_s8( imm ) ? 0x83 : 0x81 );
SibMagicType::Emit( InstType, sibdest );
if( is_s8( imm ) )
iWrite<s8>( imm );
else
iWrite<ImmType>( imm );
}
}
};
// -------------------------------------------------------------------
// Group 2 (shift) instructions have no Sib/ModRM forms.
// Note: For Imm forms, we ignore the instruction if the shift count is zero. This
// is a safe optimization since any zero-value shift does not affect any flags.
//
template< typename ImmType, G2Type InstType, typename SibMagicType >
class Group2Impl
{
public:
static const uint OperandSize = sizeof(ImmType);
Group2Impl() {} // For the love of GCC.
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __emitinline void Emit( const x86Register<OperandSize>& to, const x86Register8& from )
{
jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?)
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
ModRM( 3, InstType, to.Id );
}
static __emitinline void Emit( const x86Register<OperandSize>& to, u8 imm )
{
if( imm == 0 ) return;
prefix16();
if( imm == 1 )
{
// special encoding of 1's
iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
ModRM( 3, InstType, to.Id );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
ModRM( 3, InstType, to.Id );
iWrite<u8>( imm );
}
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, const x86Register8& from )
{
jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?)
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
SibMagicType::Emit( from.Id, sibdest );
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, u8 imm )
{
if( imm == 0 ) return;
prefix16();
if( imm == 1 )
{
// special encoding of 1's
iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
SibMagicType::Emit( InstType, sibdest );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
SibMagicType::Emit( InstType, sibdest );
iWrite<u8>( imm );
}
}
};
// -------------------------------------------------------------------
//
template< G1Type InstType >
class Group1ImplAll
{
protected:
typedef Group1Impl<u32, InstType, SibMagic> m_32;
typedef Group1Impl<u16, InstType, SibMagic> m_16;
typedef Group1Impl<u8, InstType, SibMagic> m_8;
typedef Group1Impl<u32, InstType, SibMagicInline> m_32i;
typedef Group1Impl<u16, InstType, SibMagicInline> m_16i;
typedef Group1Impl<u8, InstType, SibMagicInline> m_8i;
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// creation of the compiler.
//
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
__forceinline void operator()( const x86Register32& to, const x86Register32& from ) const { m_32i::Emit( to, from ); }
__forceinline void operator()( const x86Register32& to, const void* src ) const { m_32i::Emit( to, ptr32[src] ); }
__forceinline void operator()( const void* dest, const x86Register32& from ) const { m_32i::Emit( ptr32[dest], from ); }
__noinline void operator()( const ModSibBase& sibdest, const x86Register32& from ) const { m_32::Emit( sibdest, from ); }
__noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const x86Register32& to, u32 imm, bool needs_flags=false ) const
{
//if( needs_flags || (imm != 0) || !_optimize_imm0() )
m_32i::Emit( to, imm );
}
// ---------- 16 Bit Interface -----------
__forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); }
__forceinline void operator()( const x86Register16& to, const void* src ) const { m_16i::Emit( to, ptr16[src] ); }
__forceinline void operator()( const void* dest, const x86Register16& from ) const { m_16i::Emit( ptr16[dest], from ); }
__noinline void operator()( const ModSibBase& sibdest, const x86Register16& from ) const { m_16::Emit( sibdest, from ); }
__noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const x86Register16& to, u16 imm, bool needs_flags=false ) const { m_16i::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); }
__forceinline void operator()( const x86Register8& to, const void* src ) const { m_8i::Emit( to, ptr8[src] ); }
__forceinline void operator()( const void* dest, const x86Register8& from ) const { m_8i::Emit( ptr8[dest], from ); }
__noinline void operator()( const ModSibBase& sibdest, const x86Register8& from ) const { m_8::Emit( sibdest, from ); }
__noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm, bool needs_flags=false ) const { m_8i::Emit( to, imm ); }
Group1ImplAll() {} // Why does GCC need these?
};
// -------------------------------------------------------------------
//
template< G2Type InstType >
class Group2ImplAll
{
protected:
typedef Group2Impl<u32, InstType, SibMagic> m_32;
typedef Group2Impl<u16, InstType, SibMagic> m_16;
typedef Group2Impl<u8, InstType, SibMagic> m_8;
typedef Group2Impl<u32, InstType, SibMagicInline> m_32i;
typedef Group2Impl<u16, InstType, SibMagicInline> m_16i;
typedef Group2Impl<u8, InstType, SibMagicInline> m_8i;
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// creation of the compiler.
//
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
__forceinline void operator()( const x86Register32& to, const x86Register8& from ) const{ m_32i::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, const x86Register8& from ) const{ m_32::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const x86Register32& to, u8 imm ) const { m_32i::Emit( to, imm ); }
// ---------- 16 Bit Interface -----------
__forceinline void operator()( const x86Register16& to, const x86Register8& from ) const{ m_16i::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, const x86Register8& from ) const{ m_16::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const x86Register16& to, u8 imm ) const { m_16i::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void operator()( const x86Register8& to, const x86Register8& from ) const{ m_8i::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); }
Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds.
};
// Define the externals for Group1/2 instructions here (inside the Internal namespace).
// and then import then into the x86Emitter namespace later. Done because it saves a
// lot of Internal:: namespace resolution mess, and is better than the alternative of
// importing Internal into x86Emitter, which done at the header file level would defeat
// the purpose!)
extern const Group1ImplAll<G1Type_ADD> iADD;
extern const Group1ImplAll<G1Type_OR> iOR;
extern const Group1ImplAll<G1Type_ADC> iADC;
extern const Group1ImplAll<G1Type_SBB> iSBB;
extern const Group1ImplAll<G1Type_AND> iAND;
extern const Group1ImplAll<G1Type_SUB> iSUB;
extern const Group1ImplAll<G1Type_XOR> iXOR;
extern const Group1ImplAll<G1Type_CMP> iCMP;
extern const Group2ImplAll<G2Type_ROL> iROL;
extern const Group2ImplAll<G2Type_ROR> iROR;
extern const Group2ImplAll<G2Type_RCL> iRCL;
extern const Group2ImplAll<G2Type_RCR> iRCR;
extern const Group2ImplAll<G2Type_SHL> iSHL;
extern const Group2ImplAll<G2Type_SHR> iSHR;
extern const Group2ImplAll<G2Type_SAR> iSAR;
//////////////////////////////////////////////////////////////////////////////////////////
// Mov with sign/zero extension implementations:
//
template< int DestOperandSize, int SrcOperandSize >
class MovExtendImpl
{
protected:
static bool Is8BitOperand() { return SrcOperandSize == 1; }
static void prefix16() { if( DestOperandSize == 2 ) iWrite<u8>( 0x66 ); }
static __forceinline void emit_base( bool SignExtend )
{
prefix16();
iWrite<u8>( 0x0f );
iWrite<u8>( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) );
}
public:
MovExtendImpl() {} // For the love of GCC.
static __emitinline void Emit( const x86Register<DestOperandSize>& to, const x86Register<SrcOperandSize>& from, bool SignExtend )
{
emit_base( SignExtend );
ModRM( 3, from.Id, to.Id );
}
static __emitinline void Emit( const x86Register<DestOperandSize>& to, const ModSibStrict<SrcOperandSize>& sibsrc, bool SignExtend )
{
emit_base( SignExtend );
EmitSibMagic( to.Id, sibsrc );
}
};
// ------------------------------------------------------------------------
template< bool SignExtend >
class MovExtendImplAll
{
protected:
typedef MovExtendImpl<4, 2> m_16to32;
typedef MovExtendImpl<4, 1> m_8to32;
public:
__forceinline void operator()( const x86Register32& to, const x86Register16& from ) const { m_16to32::Emit( to, from, SignExtend ); }
__noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); }
__forceinline void operator()( const x86Register32& to, const x86Register8& from ) const { m_8to32::Emit( to, from, SignExtend ); }
__noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); }
MovExtendImplAll() {} // don't ask.
};
// ------------------------------------------------------------------------
extern const MovExtendImplAll<true> iMOVSX;
extern const MovExtendImplAll<false> iMOVZX;
#include "ix86_impl_group1.h"
#include "ix86_impl_group2.h"
#include "ix86_impl_movs.h"
// if the immediate is zero, we can replace the instruction, or ignore it
// entirely, depending on the instruction being issued. That's what we do here.
@ -1021,6 +648,63 @@ namespace x86Emitter
}*/
}
// ------------------------------------------------------------------------
// ----- Group 1 Instruction Class -----
extern const Internal::Group1ImplAll<Internal::G1Type_ADD> iADD;
extern const Internal::Group1ImplAll<Internal::G1Type_OR> iOR;
extern const Internal::Group1ImplAll<Internal::G1Type_ADC> iADC;
extern const Internal::Group1ImplAll<Internal::G1Type_SBB> iSBB;
extern const Internal::Group1ImplAll<Internal::G1Type_AND> iAND;
extern const Internal::Group1ImplAll<Internal::G1Type_SUB> iSUB;
extern const Internal::Group1ImplAll<Internal::G1Type_XOR> iXOR;
extern const Internal::Group1ImplAll<Internal::G1Type_CMP> iCMP;
// ----- Group 2 Instruction Class -----
// Optimization Note: For Imm forms, we ignore the instruction if the shift count is
// zero. This is a safe optimization since any zero-value shift does not affect any
// flags.
extern const Internal::Group2ImplAll<Internal::G2Type_ROL> iROL;
extern const Internal::Group2ImplAll<Internal::G2Type_ROR> iROR;
extern const Internal::Group2ImplAll<Internal::G2Type_RCL> iRCL;
extern const Internal::Group2ImplAll<Internal::G2Type_RCR> iRCR;
extern const Internal::Group2ImplAll<Internal::G2Type_SHL> iSHL;
extern const Internal::Group2ImplAll<Internal::G2Type_SHR> iSHR;
extern const Internal::Group2ImplAll<Internal::G2Type_SAR> iSAR;
extern const Internal::MovExtendImplAll<true> iMOVSX;
extern const Internal::MovExtendImplAll<false> iMOVZX;
extern const Internal::CMovImplGeneric iCMOV;
extern const Internal::CMovImplAll<Jcc_Above> iCMOVA;
extern const Internal::CMovImplAll<Jcc_AboveOrEqual> iCMOVAE;
extern const Internal::CMovImplAll<Jcc_Below> iCMOVB;
extern const Internal::CMovImplAll<Jcc_BelowOrEqual> iCMOVBE;
extern const Internal::CMovImplAll<Jcc_Greater> iCMOVG;
extern const Internal::CMovImplAll<Jcc_GreaterOrEqual> iCMOVGE;
extern const Internal::CMovImplAll<Jcc_Less> iCMOVL;
extern const Internal::CMovImplAll<Jcc_LessOrEqual> iCMOVLE;
extern const Internal::CMovImplAll<Jcc_Zero> iCMOVZ;
extern const Internal::CMovImplAll<Jcc_Equal> iCMOVE;
extern const Internal::CMovImplAll<Jcc_NotZero> iCMOVNZ;
extern const Internal::CMovImplAll<Jcc_NotEqual> iCMOVNE;
extern const Internal::CMovImplAll<Jcc_Overflow> iCMOVO;
extern const Internal::CMovImplAll<Jcc_NotOverflow> iCMOVNO;
extern const Internal::CMovImplAll<Jcc_Carry> iCMOVC;
extern const Internal::CMovImplAll<Jcc_NotCarry> iCMOVNC;
extern const Internal::CMovImplAll<Jcc_Signed> iCMOVS;
extern const Internal::CMovImplAll<Jcc_Unsigned> iCMOVNS;
extern const Internal::CMovImplAll<Jcc_ParityEven> iCMOVPE;
extern const Internal::CMovImplAll<Jcc_ParityOdd> iCMOVPO;
}
#include "ix86_inlines.inl"