Many Emitter updates:

* added implementations for MOV and Shift instructions (SHL, SHR, ROL, ROR, etc).
 * Improved compilation optimization considerably, by improving inlining selection in cases where constant propagation can be resolved reliably.
 * Moved lots of code around, so that the new emitter and the legacy emitter are more clearly separated; and renamed some vars.
 * Changed recompilers to initialize the recBlocks array to 0xcc instead of 0xcd (fills the blocks with the single-byte instruction INT3, which fixes the misalignment mess that would sometimes happen when using disasm views on the RecBlocks contents).
 * Switched back to /O2 (Optimize for Speed) instead of /Ox, since MSVC (for me) generally fails to optimize Thread-Local storage in /Ox mode.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@971 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-14 01:26:57 +00:00
parent 2711d14f0c
commit 6cceed6268
23 changed files with 3286 additions and 2639 deletions

View File

@ -517,8 +517,8 @@ void __fastcall vuMicroRead128(u32 addr,mem128_t* data)
data[1]=*(u64*)&vu->Micro[addr+8];
}
// [TODO] : Profile this code and see how often the VUs get written, and how
// often it changes the values being written (invoking a cpuClear).
// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per
// frame in-game, and usually none at all after BIOS), so cpu clears aren't much of a big deal.
template<int vunum, bool dynrec>
void __fastcall vuMicroWrite8(u32 addr,mem8_t data)

View File

@ -217,7 +217,6 @@
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
EnableFiberSafeOptimizations="true"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="NDEBUG"
@ -2927,7 +2926,11 @@
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_group1.cpp"
RelativePath="..\..\x86\ix86\ix86_inlines.inl"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_instructions.h"
>
</File>
<File
@ -2938,6 +2941,18 @@
RelativePath="..\..\x86\ix86\ix86_legacy.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_instructions.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_internal.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_types.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_mmx.cpp"
>
@ -2959,6 +2974,10 @@
>
</File>
</Filter>
<File
RelativePath=".\ClassDiagram1.cd"
>
</File>
<File
RelativePath="..\..\Common.h"
>

View File

@ -538,7 +538,7 @@ void recResetIOP()
DevCon::Status( "iR3000A Resetting recompiler memory and structures" );
memset_8<0xcd,RECMEM_SIZE>( recMem );
memset_8<0xcc,RECMEM_SIZE>( recMem ); // 0xcc is INT3
iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));

View File

@ -799,17 +799,22 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c);
}
else {
// (this is one of my test cases for the new emitter --air)
using namespace x86Emitter;
if ( x86reg >= 0 ) {
if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset);
if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4);
if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8);
if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12);
x86IndexReg thisreg( x86reg );
if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000 );
if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000 );
if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000 );
if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000);
}
else {
if ( _X ) MOV32ItoM(offset, 0x00000000);
if ( _Y ) MOV32ItoM(offset+4, 0x00000000);
if ( _Z ) MOV32ItoM(offset+8, 0x00000000);
if ( _W ) MOV32ItoM(offset+12, 0x3f800000);
if ( _X ) MOV(ptr32[offset], 0x00000000);
if ( _Y ) MOV(ptr32[offset+4], 0x00000000);
if ( _Z ) MOV(ptr32[offset+8], 0x00000000);
if ( _W ) MOV(ptr32[offset+14], 0x3f800000);
}
}
return;

View File

@ -3569,7 +3569,7 @@ void recVUMI_BranchHandle()
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION)
MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc);
MOV32ItoR(s_JumpX86, 0);
MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR
s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1;
if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) {
@ -3578,7 +3578,7 @@ void recVUMI_BranchHandle()
if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION )
MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8);
MOV32ItoR(s_JumpX86, 0);
MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR
s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1;
x86SetJ8( j8Ptr[ 1 ] );
@ -3815,7 +3815,7 @@ void recVUMI_B( VURegs* vuu, s32 info )
if( s_pCurBlock->blocks.size() > 1 ) {
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
MOV32ItoR(s_JumpX86, 0);
MOV32ItoR(s_JumpX86, 1);
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1;
s_UnconditionalDelay = 1;
}
@ -3841,7 +3841,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info )
if( s_pCurBlock->blocks.size() > 1 ) {
s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE);
MOV32ItoR(s_JumpX86, 0);
MOV32ItoR(s_JumpX86, 1);
s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1;
s_UnconditionalDelay = 1;
}

View File

@ -531,7 +531,7 @@ void recResetEE( void )
maxrecmem = 0;
memset_8<0xcd, REC_CACHEMEM>(recMem);
memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));

View File

@ -31,14 +31,24 @@ using namespace vtlb_private;
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason)
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
{
MOV32RmtoR(EAX,srcRm);
MOV32RtoRm(destRm,EAX);
MOV32RmtoR(EAX,srcRm,4);
MOV32RtoRm(destRm,EAX,4);
MOV32RmtoR(EAX,srcRm,8);
MOV32RtoRm(destRm,EAX,8);
MOV32RmtoR(EAX,srcRm,12);
MOV32RtoRm(destRm,EAX,12);
// (this is one of my test cases for the new emitter --air)
using namespace x86Emitter;
x86IndexReg src( srcRm );
x86IndexReg dest( destRm );
MOV( eax, ptr[src] );
MOV( ptr[dest], eax );
MOV( eax, ptr[src+4] );
MOV( ptr[dest+4], eax );
MOV( eax, ptr[src+8] );
MOV( ptr[dest+8], eax );
MOV( eax, ptr[src+12] );
MOV( ptr[dest+12], eax );
}
/*

View File

@ -15,13 +15,20 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* ix86 core v0.6.2
* Authors: linuzappz <linuzappz@pcsx.net>
* alexey silinov
* goldfinger
* zerofrog(@gmail.com)
* cottonvibes(@gmail.com)
* ix86 core v0.9.0
*
* Original Authors (v0.6.2 and prior):
* linuzappz <linuzappz@pcsx.net>
* alexey silinov
* goldfinger
* zerofrog(@gmail.com)
*
* Authors of v0.9.0:
* Jake.Stine(@gmail.com)
* cottonvibes(@gmail.com)
* sudonim(1@gmail.com)
*/
#include "PrecompiledHeader.h"
@ -29,310 +36,363 @@
#include "System.h"
#include "ix86_internal.h"
// ------------------------------------------------------------------------
// Notes on Thread Local Storage:
// * TLS is pretty simple, and "just works" from a programmer perspective, with only
// some minor additional computational overhead (see performance notes below).
//
// * MSVC and GCC handle TLS differently internally, but behavior to the programmer is
// generally identical.
//
// Performance Considerations:
// * GCC's implementation involves an extra dereference from normal storage.
//
// * MSVC's implementation involves *two* extra dereferences from normal storage because
// it has to look up the TLS heap pointer from the Windows Thread Storage Area. (in
// generated ASM code, this dereference is denoted by access to the fs:[2ch] address).
//
// * However, in either case, the optimizer usually optimizes it to a register so the
// extra overhead is minimal over a series of instructions. (Note!! the Full Opt-
// imization [/Ox] option effectively disables TLS optimizations in MSVC, causing
// generally significant code bloat).
//
__threadlocal u8 *x86Ptr;
__threadlocal u8 *j8Ptr[32];
__threadlocal u32 *j32Ptr[32];
PCSX2_ALIGNED16(u32 p[4]);
PCSX2_ALIGNED16(u32 p2[4]);
PCSX2_ALIGNED16(float f[4]);
XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT };
namespace x86Emitter {
const x86IndexerType ptr;
//////////////////////////////////////////////////////////////////////////////////////////
//
const x86Register32 x86Register32::Empty( -1 );
const x86Register32 eax( 0 );
const x86Register32 ebx( 3 );
const x86Register32 ecx( 1 );
const x86Register32 edx( 2 );
const x86Register32 esi( 6 );
const x86Register32 edi( 7 );
const x86Register32 ebp( 5 );
const x86Register32 esp( 4 );
const x86Register16 ax( 0 );
const x86Register16 bx( 3 );
const x86Register16 cx( 1 );
const x86Register16 dx( 2 );
const x86Register16 si( 6 );
const x86Register16 di( 7 );
const x86Register16 bp( 5 );
const x86Register16 sp( 4 );
const x86Register8 al( 0 );
const x86Register8 cl( 1 );
const x86Register8 dl( 2 );
const x86Register8 bl( 3 );
const x86Register8 ah( 4 );
const x86Register8 ch( 5 );
const x86Register8 dh( 6 );
const x86Register8 bh( 7 );
//////////////////////////////////////////////////////////////////////////////////////////
// x86Register Method Implementations
//
x86ModRm x86Register32::operator+( const x86Register32& right ) const
{
return x86ModRm( *this, right );
}
x86ModRm x86Register32::operator+( const x86ModRm& right ) const
{
return right + *this;
}
x86ModRm x86Register32::operator+( s32 right ) const
{
return x86ModRm( *this, right );
}
x86ModRm x86Register32::operator*( u32 right ) const
{
return x86ModRm( Empty, *this, right );
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86ModRm Method Implementations
//
x86ModRm& x86ModRm::Add( const x86IndexReg& src )
{
if( src == Index )
{
Factor++;
}
else if( src == Base )
{
// Compound the existing register reference into the Index/Scale pair.
Base = x86IndexReg::Empty;
if( src == Index )
Factor++;
else
{
jASSUME( Index.IsEmpty() ); // or die if we already have an index!
Index = src;
Factor = 2;
}
}
else if( Base.IsEmpty() )
Base = src;
else if( Index.IsEmpty() )
Index = src;
else
assert( false ); // oops, only 2 regs allowed per ModRm!
return *this;
}
x86ModRm& x86ModRm::Add( const x86ModRm& src )
{
Add( src.Base );
Add( src.Displacement );
// If the factor is 1, we can just treat index like a base register also.
if( src.Factor == 1 )
{
Add( src.Index );
}
else if( Index.IsEmpty() )
{
Index = src.Index;
Factor = 1;
}
else if( Index == src.Index )
Factor++;
else
assert( false ); // oops, only 2 regs allowed!
return *this;
}
//////////////////////////////////////////////////////////////////////////////////////////
// ModSib Method Implementations
//
const x86IndexerTypeExplicit<4> ptr32;
const x86IndexerTypeExplicit<2> ptr16;
const x86IndexerTypeExplicit<1> ptr8;
// ------------------------------------------------------------------------
// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values.
// Necessary because by default ModSib compounds registers into Index when possible.
//
void ModSib::Reduce()
const x86Register32 x86Register32::Empty;
const x86Register16 x86Register16::Empty;
const x86Register8 x86Register8::Empty;
const x86IndexReg x86IndexReg::Empty;
const x86Register32
eax( 0 ), ebx( 3 ),
ecx( 1 ), edx( 2 ),
esi( 6 ), edi( 7 ),
ebp( 5 ), esp( 4 );
const x86Register16
ax( 0 ), bx( 3 ),
cx( 1 ), dx( 2 ),
si( 6 ), di( 7 ),
bp( 5 ), sp( 4 );
const x86Register8
al( 0 ), cl( 1 ),
dl( 2 ), bl( 3 ),
ah( 4 ), ch( 5 ),
dh( 6 ), bh( 7 );
namespace Internal
{
// If no index reg, then load the base register into the index slot.
if( Index.IsEmpty() )
const Group1ImplAll<G1Type_ADD> ADD;
const Group1ImplAll<G1Type_OR> OR;
const Group1ImplAll<G1Type_ADC> ADC;
const Group1ImplAll<G1Type_SBB> SBB;
const Group1ImplAll<G1Type_AND> AND;
const Group1ImplAll<G1Type_SUB> SUB;
const Group1ImplAll<G1Type_XOR> XOR;
const Group1ImplAll<G1Type_CMP> CMP;
const Group2ImplAll<G2Type_ROL> ROL;
const Group2ImplAll<G2Type_ROR> ROR;
const Group2ImplAll<G2Type_RCL> RCL;
const Group2ImplAll<G2Type_RCR> RCR;
const Group2ImplAll<G2Type_SHL> SHL;
const Group2ImplAll<G2Type_SHR> SHR;
const Group2ImplAll<G2Type_SAR> SAR;
// Performance note: VC++ wants to use byte/word register form for the following
// ModRM/SibSB constructors if we use iWrite<u8>, and furthermore unrolls the
// the shift using a series of ADDs for the following results:
// add cl,cl
// add cl,cl
// add cl,cl
// or cl,bl
// add cl,cl
// ... etc.
//
// This is unquestionably bad optimization by Core2 standard, an generates tons of
// register aliases and false dependencies. (although may have been ideal for early-
// brand P4s with a broken barrel shifter?). The workaround is to do our own manual
// x86Ptr access and update using a u32 instead of u8. Thanks to little endianness,
// the same end result is achieved and no false dependencies are generated.
//
// (btw, I know this isn't a critical performance item by any means, but it's
// annoying simply because it *should* be an easy thing to optimize)
__forceinline void ModRM( uint mod, uint reg, uint rm )
{
*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
x86Ptr++;
}
__forceinline void SibSB( u32 ss, u32 index, u32 base )
{
*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
x86Ptr++;
}
// ------------------------------------------------------------------------
// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
// instruction ca be encoded as ModRm alone.
static __forceinline bool NeedsSibMagic( const ModSibBase& info )
{
// If base register is ESP, then we need a SIB:
if( info.Base.IsStackPointer() ) return true;
// no registers? no sibs!
// (ModSibBase::Reduce
if( info.Index.IsEmpty() ) return false;
// A scaled register needs a SIB
if( info.Scale != 0 ) return true;
// two registers needs a SIB
if( !info.Base.IsEmpty() ) return true;
return false;
}
//////////////////////////////////////////////////////////////////////////////////////////
// Conditionally generates Sib encoding information!
//
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
__forceinline void EmitSibMagic( uint regfield, const ModSibBase& info )
{
jASSUME( regfield < 8 );
int displacement_size = (info.Displacement == 0) ? 0 :
( ( info.IsByteSizeDisp() ) ? 1 : 2 );
if( !NeedsSibMagic( info ) )
{
// Use ModRm-only encoding, with the rm field holding an index/base register, if
// one has been specified. If neither register is specified then use Disp32 form,
// which is encoded as "EBP w/o displacement" (which is why EBP must always be
// encoded *with* a displacement of 0, if it would otherwise not have one).
if( info.Index.IsEmpty() )
{
ModRM( 0, regfield, ModRm_UseDisp32 );
iWrite<u32>( info.Displacement );
return;
}
else
{
if( info.Index == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM( displacement_size, regfield, info.Index.Id );
}
}
else
{
// In order to encode "just" index*scale (and no base), we have to encode
// it as a special [index*scale + displacement] form, which is done by
// specifying EBP as the base register and setting the displacement field
// to zero. (same as ModRm w/o SIB form above, basically, except the
// ModRm_UseDisp flag is specified in the SIB instead of the ModRM field).
if( info.Base.IsEmpty() )
{
ModRM( 0, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
iWrite<u32>( info.Displacement );
return;
}
else
{
if( info.Base == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM( displacement_size, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, info.Base.Id );
}
}
if( displacement_size != 0 )
{
*(u32*)x86Ptr = info.Displacement;
x86Ptr += (displacement_size == 1) ? 1 : 4;
}
}
}
using namespace Internal;
/*
emitterT void x86SetPtr( u8* ptr )
{
x86Ptr = ptr;
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86Ptr Label API
//
class x86Label
{
public:
class Entry
{
protected:
u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type)
u8* m_base; // base address of the instruction (passed to the instruction)
int m_cc; // comparison type of the instruction
public:
explicit Entry( int cc ) :
m_base( x86Ptr )
, m_writebackpos( writebackidx )
{
}
void Commit( const u8* target ) const
{
//uptr reltarget = (uptr)m_base - (uptr)target;
//*((u32*)&m_base[m_writebackpos]) = reltarget;
jASSUME( m_emit != NULL );
jASSUME( m_base != NULL );
return m_emit( m_base, target, m_cc );
}
};
protected:
u8* m_target; // x86Ptr target address of this label
Entry m_writebacks[8];
int m_writeback_curpos;
public:
// creates a label list with no valid target.
// Use x86LabelList::Set() to set a target prior to class destruction.
x86Label() : m_target()
{
}
x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() )
{
Index = Base;
Scale = 0;
Base = x86IndexReg::Empty;
return;
}
// The Scale has a series of valid forms, all shown here:
// Performs all address writebacks on destruction.
virtual ~x86Label()
{
IssueWritebacks();
}
void SetTarget() { m_address = x86Ptr; }
void SetTarget( void* addr ) { m_address = (u8*)addr; }
void Clear()
{
m_writeback_curpos = 0;
}
switch( Scale )
// Adds a jump/call instruction to this label for writebacks.
void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc )
{
case 0: break;
case 1: Scale = 0; break;
case 2: Scale = 1; break;
case 3: // becomes [reg*2+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 1;
break;
case 4: Scale = 2; break;
case 5: // becomes [reg*4+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 2;
break;
case 6: // invalid!
assert( false );
break;
case 7: // so invalid!
assert( false );
break;
case 8: Scale = 3; break;
case 9: // becomes [reg*8+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 3;
break;
jASSUME( m_writeback_curpos < MaxWritebacks );
m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) );
m_writeback_curpos++;
}
}
ModSib::ModSib( const x86ModRm& src ) :
Base( src.Base ),
Index( src.Index ),
Scale( src.Factor ),
Displacement( src.Displacement )
{
Reduce();
}
ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) :
Base( base ),
Index( index ),
Scale( scale ),
Displacement( displacement )
{
Reduce();
}
ModSib::ModSib( s32 displacement ) :
Base(),
Index(),
Scale(0),
Displacement( displacement )
{
}
// ------------------------------------------------------------------------
// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
// instruction ca be encoded as ModRm alone.
bool NeedsSibMagic( const ModSib& info )
{
// no registers? no sibs!
if( info.Index.IsEmpty() ) return false;
// A scaled register needs a SIB
if( info.Scale != 0 ) return true;
// two registers needs a SIB
if( !info.Base.IsEmpty() ) return true;
// If index register is ESP, then we need a SIB:
// (the ModSib::Reduce() ensures that stand-alone ESP will be in the
// index position for us)
if( info.Index == esp ) return true;
return false;
}
// ------------------------------------------------------------------------
// Conditionally generates Sib encoding information!
//
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
void EmitSibMagic( int regfield, const ModSib& info )
{
int displacement_size = (info.Displacement == 0) ? 0 :
( ( info.IsByteSizeDisp() ) ? 1 : 2 );
if( !NeedsSibMagic( info ) )
void IssueWritebacks() const
{
// Use ModRm-only encoding, with the rm field holding an index/base register, if
// one has been specified. If neither register is specified then use Disp32 form,
// which is encoded as "EBP w/o displacement" (which is why EBP must always be
// encoded *with* a displacement of 0, if it would otherwise not have one).
if( info.Index.IsEmpty() )
ModRM( 0, regfield, ModRm_UseDisp32 );
else
const std::list<Entry>::const_iterator& start = m_list_writebacks.
for( ; start!=end; start++ )
{
if( info.Index == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
Entry& current = *start;
u8* donespot = current.Commit();
// Copy the data from the m_nextinst to the current location,
// and update any additional writebacks (but what about multiple labels?!?)
ModRM( displacement_size, regfield, info.Index.Id );
}
}
else
{
// In order to encode "just" index*scale (and no base), we have to encode
// it as a special [index*scale + displacement] form, which is done by
// specifying EBP as the base register and setting the displacement field
// to zero. (same as ModRm w/o SIB form above, basically, except the
// ModRm_UseDisp flag is specified in the SIB instead of the ModRM field).
};
#endif
if( info.Base.IsEmpty() )
{
ModRM( 0, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
displacement_size = 2;
}
else
{
if( info.Base == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM( displacement_size, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, info.Base.Id );
}
}
switch( displacement_size )
{
case 0: break;
case 1: write8( info.Displacement ); break;
case 2: write32( info.Displacement ); break;
jNO_DEFAULT
}
void JMP( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJMP, 0 );
}
void JLE( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJCC, 0 );
}
void x86SetJ8( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
*j8 = (u8)jump;
}
void x86SetJ8A( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
if( ((uptr)x86Ptr&0xf) > 4 ) {
uptr newjump = jump + 16-((uptr)x86Ptr&0xf);
if( newjump <= 0x7f ) {
jump = newjump;
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
}
}
*j8 = (u8)jump;
}
emitterT void x86SetJ32( u32* j32 )
{
*j32 = ( x86Ptr - (u8*)j32 ) - 4;
}
emitterT void x86SetJ32A( u32* j32 )
{
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
x86SetJ32(j32);
}
emitterT void x86Align( int bytes )
{
// forward align
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
}
*/
// ------------------------------------------------------------------------
// Conditionally generates Sib encoding information!
// Internal implementation of EmitSibMagic which has been custom tailored
// to optimize special forms of the Lea instructions accordingly, such
// as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg".
//
// regfield - register field to be written to the ModRm. This is either a register specifier
// or an opcode extension. In either case, the instruction determines the value for us.
//
emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info )
{
EmitSibMagic( regfield.Id, info );
}
template< typename ToReg >
static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false )
static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false )
{
int displacement_size = (src.Displacement == 0) ? 0 :
( ( src.IsByteSizeDisp() ) ? 1 : 2 );
@ -348,17 +408,17 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false )
if( src.Index.IsEmpty() )
{
if( is16bit )
MOV16ItoR( to.Id, src.Displacement );
MOV( to, src.Displacement );
else
MOV32ItoR( to.Id, src.Displacement );
MOV( to, src.Displacement );
return;
}
else if( displacement_size == 0 )
{
if( is16bit )
MOV16RtoR( to.Id, src.Index.Id );
MOV( to, ToReg( src.Index.Id ) );
else
MOV32RtoR( to.Id, src.Index.Id );
MOV( to, ToReg( src.Index.Id ) );
return;
}
else
@ -366,7 +426,7 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false )
// note: no need to do ebp+0 check since we encode all 0 displacements as
// register assignments above (via MOV)
write8( 0x8d );
iWrite<u8>( 0x8d );
ModRM( displacement_size, to.Id, src.Index.Id );
}
}
@ -377,115 +437,236 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false )
if( displacement_size == 0 )
{
// Encode [Index*Scale] as a combination of Mov and Shl.
// This is more efficient because of the bloated format which requires
// a 32 bit displacement.
// This is more efficient because of the bloated LEA format which requires
// a 32 bit displacement, and the compact nature of the alterntive.
//
// (this does not apply to older model P4s with the broken barrel shifter,
// but we currently aren't optimizing for that target anyway).
if( is16bit )
{
MOV16RtoR( to.Id, src.Index.Id );
SHL16ItoR( to.Id, src.Scale );
}
else
{
MOV32RtoR( to.Id, src.Index.Id );
SHL32ItoR( to.Id, src.Scale );
}
MOV( to, ToReg( src.Index.Id ) );
SHL( to, src.Scale );
return;
}
write8( 0x8d );
iWrite<u8>( 0x8d );
ModRM( 0, to.Id, ModRm_UseSib );
SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 );
displacement_size = 2; // force 32bit displacement.
iWrite<u32>( src.Displacement );
return;
}
else
{
if( src.Base == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
write8( 0x8d );
iWrite<u8>( 0x8d );
ModRM( displacement_size, to.Id, ModRm_UseSib );
SibSB( src.Scale, src.Index.Id, src.Base.Id );
/*switch( displacement_size )
{
case 0: break;
case 1: emit.write<u8>( src.Displacement ); break;
case 2: emit.write<u32>( src.Displacement ); break;
jNO_DEFAULT
}*/
}
}
switch( displacement_size )
{
case 0: break;
case 1: write8( src.Displacement ); break;
case 2: write32( src.Displacement ); break;
jNO_DEFAULT
}
if( displacement_size != 0 )
{
*(u32*)x86Ptr = src.Displacement;
x86Ptr += (displacement_size == 1) ? 1 : 4;
}
}
emitterT void LEA32( x86Register32 to, const ModSib& src )
__emitinline void LEA( x86Register32 to, const ModSibBase& src )
{
EmitLeaMagic( to, src );
}
emitterT void LEA16( x86Register16 to, const ModSib& src )
__emitinline void LEA( x86Register16 to, const ModSibBase& src )
{
// fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form?
write8( 0x66 );
EmitLeaMagic( to, src );
}
//////////////////////////////////////////////////////////////////////////////////////////
// MOV instruction Implementation
template< typename ImmType, typename SibMagicType >
class MovImpl
{
public:
static const uint OperandSize = sizeof(ImmType);
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __forceinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
{
if( to == from ) return; // ignore redundant MOVs.
prefix16();
iWrite<u8>( Is8BitOperand() ? 0x88 : 0x89 );
ModRM( 3, from.Id, to.Id );
}
static __forceinline void Emit( const ModSibBase& dest, const x86Register<OperandSize>& from )
{
prefix16();
// mov eax has a special from when writing directly to a DISP32 address
// (sans any register index/base registers).
if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() )
{
iWrite<u8>( Is8BitOperand() ? 0xa2 : 0xa3 );
iWrite<u32>( dest.Displacement );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0x88 : 0x89 );
SibMagicType::Emit( from.Id, dest );
}
}
static __forceinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& src )
{
prefix16();
// mov eax has a special from when reading directly from a DISP32 address
// (sans any register index/base registers).
if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() )
{
iWrite<u8>( Is8BitOperand() ? 0xa0 : 0xa1 );
iWrite<u32>( src.Displacement );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0x8a : 0x8b );
SibMagicType::Emit( to.Id, src );
}
}
static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
{
// Note: MOV does not have (reg16/32,imm8) forms.
if( imm == 0 )
XOR( to, to );
else
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id );
iWrite<ImmType>( imm );
}
}
static __forceinline void Emit( ModSibStrict<OperandSize> dest, ImmType imm )
{
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xc6 : 0xc7 );
SibMagicType::Emit( 0, dest );
iWrite<ImmType>( imm );
}
};
namespace Internal
{
typedef MovImpl<u32,SibMagic> MOV32;
typedef MovImpl<u16,SibMagic> MOV16;
typedef MovImpl<u8,SibMagic> MOV8;
typedef MovImpl<u32,SibMagicInline> MOV32i;
typedef MovImpl<u16,SibMagicInline> MOV16i;
typedef MovImpl<u8,SibMagicInline> MOV8i;
}
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlinign is up to the dis-
// cretion of the compiler.
//
// TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D
// ---------- 32 Bit Interface -----------
__forceinline void MOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); }
__forceinline void MOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); }
__forceinline void MOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); }
__noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); }
__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); }
void MOV( const x86Register32& to, u32 imm ) { MOV32i::Emit( to, imm ); }
// ---------- 16 Bit Interface -----------
__forceinline void MOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); }
__forceinline void MOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); }
__forceinline void MOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); }
__noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); }
__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); }
void MOV( const x86Register16& to, u16 imm ) { MOV16i::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); }
__forceinline void MOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); }
__forceinline void MOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); }
__noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); }
__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); }
void MOV( const x86Register8& to, u8 imm ) { MOV8i::Emit( to, imm ); }
//////////////////////////////////////////////////////////////////////////////////////////
// Miscellaneous Section!
// Various Instructions with no parameter and no special encoding logic.
//
emitterT void RET() { write8( 0xC3 ); }
emitterT void CBW() { write16( 0x9866 ); }
emitterT void CWD() { write8( 0x98 ); }
emitterT void CDQ() { write8( 0x99 ); }
emitterT void CWDE() { write8( 0x98 ); }
__forceinline void RET() { write8( 0xC3 ); }
__forceinline void CBW() { write16( 0x9866 ); }
__forceinline void CWD() { write8( 0x98 ); }
__forceinline void CDQ() { write8( 0x99 ); }
__forceinline void CWDE() { write8( 0x98 ); }
emitterT void LAHF() { write8( 0x9f ); }
emitterT void SAHF() { write8( 0x9e ); }
__forceinline void LAHF() { write8( 0x9f ); }
__forceinline void SAHF() { write8( 0x9e ); }
//////////////////////////////////////////////////////////////////////////////////////////
// Push / Pop Emitters
//
// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu
// is running in. So even thought these say push32, they would essentially be push64 on
// an x64 build. Should I rename them accordingly? --air
//
// Note: pushad/popad implementations are intentionally left out. The instructions are
// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead.
emitterT void POP( x86Register32 from )
__forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); }
__emitinline void POP( const ModSibBase& from )
{
write8( 0x58 | from.Id );
iWrite<u8>( 0x8f ); Internal::EmitSibMagic( 0, from );
}
emitterT void POP( const ModSib& from )
{
write8( 0x8f ); EmitSibMagic( 0, from );
}
__forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); }
__forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); }
emitterT void PUSH( u32 imm )
__emitinline void PUSH( const ModSibBase& from )
{
write8( 0x68 ); write32( imm );
}
emitterT void PUSH( x86Register32 from )
{
write8( 0x50 | from.Id );
}
emitterT void PUSH( const ModSib& from )
{
write8( 0xff ); EmitSibMagic( 6, from );
iWrite<u8>( 0xff ); Internal::EmitSibMagic( 6, from );
}
// pushes the EFLAGS register onto the stack
emitterT void PUSHFD() { write8( 0x9C ); }
__forceinline void PUSHFD() { write8( 0x9C ); }
// pops the EFLAGS register from the stack
emitterT void POPFD() { write8( 0x9D ); }
__forceinline void POPFD() { write8( 0x9D ); }
}

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,7 @@
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
#include "ix86_legacy_internal.h"
//------------------------------------------------------------------
// 3DNOW instructions

View File

@ -18,9 +18,8 @@
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
#include "System.h"
#include "Threading.h"
#include "ix86_legacy_internal.h"
#include "RedtapeWindows.h"

View File

@ -17,7 +17,7 @@
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
#include "ix86_legacy_internal.h"
//------------------------------------------------------------------
// FPU instructions

View File

@ -0,0 +1,237 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* ix86 core v0.9.0
*
* Original Authors (v0.6.2 and prior):
* linuzappz <linuzappz@pcsx.net>
* alexey silinov
* goldfinger
* zerofrog(@gmail.com)
*
* Authors of v0.9.0:
* Jake.Stine(@gmail.com)
* cottonvibes(@gmail.com)
* sudonim(1@gmail.com)
*/
#pragma once
// This header module contains functions which, under most circumstances, inline
// nicely with constant propagation from the compiler, resulting in little or
// no actual codegen in the majority of emitter statements. (common forms include:
// RegToReg, PointerToReg, RegToPointer). These cannot be included in the class
// definitions in the .h file because of inter-dependencies with other classes.
// (score one for C++!!)
//
// In order for MSVC to work correctly with __forceinline on class members,
// however, we need to include these methods into all source files which might
// reference them. Without this MSVC generates linker errors. Or, in other words,
// global optimization fails to resolve the externals and junk.
// (score one for MSVC!)
namespace x86Emitter
{
//////////////////////////////////////////////////////////////////////////////////////////
// x86Register Method Implementations
//
__forceinline x86AddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const
{
return x86AddressInfo( *this, right );
}
__forceinline x86AddressInfo x86IndexReg::operator+( const x86AddressInfo& right ) const
{
return right + *this;
}
__forceinline x86AddressInfo x86IndexReg::operator+( s32 right ) const
{
return x86AddressInfo( *this, right );
}
__forceinline x86AddressInfo x86IndexReg::operator*( u32 right ) const
{
return x86AddressInfo( Empty, *this, right );
}
__forceinline x86AddressInfo x86IndexReg::operator<<( u32 shift ) const
{
return x86AddressInfo( Empty, *this, 1<<shift );
}
//////////////////////////////////////////////////////////////////////////////////////////
// ModSib Method Implementations
//
// ------------------------------------------------------------------------
__forceinline ModSibBase::ModSibBase( const x86AddressInfo& src ) :
Base( src.Base ),
Index( src.Index ),
Scale( src.Factor ),
Displacement( src.Displacement )
{
Reduce();
}
// ------------------------------------------------------------------------
__forceinline ModSibBase::ModSibBase( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) :
Base( base ),
Index( index ),
Scale( scale ),
Displacement( displacement )
{
Reduce();
}
// ------------------------------------------------------------------------
__forceinline ModSibBase::ModSibBase( s32 displacement ) :
Base(),
Index(),
Scale(0),
Displacement( displacement )
{
// no reduction necessary :D
}
// ------------------------------------------------------------------------
// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values.
// Necessary because by default ModSib compounds registers into Index when possible.
//
// If the ModSib is in illegal form ([Base + Index*5] for example) then an assertion
// followed by an InvalidParameter Exception will be tossed around in haphazard
// fashion.
__forceinline void ModSibBase::Reduce()
{
// If no index reg, then load the base register into the index slot.
if( Index.IsEmpty() )
{
Index = Base;
Scale = 0;
Base = x86IndexReg::Empty;
return;
}
// The Scale has a series of valid forms, all shown here:
switch( Scale )
{
case 0: break;
case 1: Scale = 0; break;
case 2: Scale = 1; break;
case 3: // becomes [reg*2+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 1;
break;
case 4: Scale = 2; break;
case 5: // becomes [reg*4+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 2;
break;
case 6: // invalid!
assert( false );
break;
case 7: // so invalid!
assert( false );
break;
case 8: Scale = 3; break;
case 9: // becomes [reg*8+reg]
jASSUME( Base.IsEmpty() );
Base = Index;
Scale = 3;
break;
}
if( Index.IsStackPointer() )
{
// esp cannot be encoded as the index, so move it to the Base, if possible.
jASSUME( Scale == 0 );
jASSUME( Base.IsEmpty() );
Base = Index;
// noe: leave index assigned to esp also (generates correct encoding later)
//Index = x86IndexReg::Empty;
}
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86AddressInfo Method Implementations
//
__forceinline x86AddressInfo& x86AddressInfo::Add( const x86IndexReg& src )
{
if( src == Index )
{
Factor++;
}
else if( src == Base )
{
// Compound the existing register reference into the Index/Scale pair.
Base = x86IndexReg::Empty;
if( src == Index )
Factor++;
else
{
jASSUME( Index.IsEmpty() ); // or die if we already have an index!
Index = src;
Factor = 2;
}
}
else if( Base.IsEmpty() )
Base = src;
else if( Index.IsEmpty() )
Index = src;
else
assert( false ); // oops, only 2 regs allowed per ModRm!
return *this;
}
// ------------------------------------------------------------------------
__forceinline x86AddressInfo& x86AddressInfo::Add( const x86AddressInfo& src )
{
Add( src.Base );
Add( src.Displacement );
// If the factor is 1, we can just treat index like a base register also.
if( src.Factor == 1 )
{
Add( src.Index );
}
else if( Index.IsEmpty() )
{
Index = src.Index;
Factor = 1;
}
else if( Index == src.Index )
Factor++;
else
assert( false ); // oops, only 2 regs allowed!
return *this;
}
}

View File

@ -0,0 +1,107 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* ix86 definitions v0.9.0
*
* Original Authors (v0.6.2 and prior):
* linuzappz <linuzappz@pcsx.net>
* alexey silinov
* goldfinger
* zerofrog(@gmail.com)
*
* Authors of v0.9.0:
* Jake.Stine(@gmail.com)
* cottonvibes(@gmail.com)
* sudonim(1@gmail.com)
*/
#pragma once
namespace x86Emitter
{
// ----- Lea Instructions (Load Effective Address) -----
// Note: alternate (void*) forms of these instructions are not provided since those
// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
// instead.
extern void LEA( x86Register32 to, const ModSibBase& src );
extern void LEA( x86Register16 to, const ModSibBase& src );
// ----- Push / Pop Instructions -----
extern void POP( x86Register32 from );
extern void POP( const ModSibBase& from );
extern void PUSH( u32 imm );
extern void PUSH( x86Register32 from );
extern void PUSH( const ModSibBase& from );
static __forceinline void POP( void* from ) { POP( ptr[from] ); }
static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); }
// ------------------------------------------------------------------------
using Internal::ADD;
using Internal::OR;
using Internal::ADC;
using Internal::SBB;
using Internal::AND;
using Internal::SUB;
using Internal::XOR;
using Internal::CMP;
using Internal::ROL;
using Internal::ROR;
using Internal::RCL;
using Internal::RCR;
using Internal::SHL;
using Internal::SHR;
using Internal::SAR;
// ---------- 32 Bit Interface -----------
extern void MOV( const x86Register32& to, const x86Register32& from );
extern void MOV( const ModSibBase& sibdest, const x86Register32& from );
extern void MOV( const x86Register32& to, const ModSibBase& sibsrc );
extern void MOV( const x86Register32& to, const void* src );
extern void MOV( const void* dest, const x86Register32& from );
extern void MOV( const x86Register32& to, u32 imm );
extern void MOV( const ModSibStrict<4>& sibdest, u32 imm );
// ---------- 16 Bit Interface -----------
extern void MOV( const x86Register16& to, const x86Register16& from );
extern void MOV( const ModSibBase& sibdest, const x86Register16& from );
extern void MOV( const x86Register16& to, const ModSibBase& sibsrc );
extern void MOV( const x86Register16& to, const void* src );
extern void MOV( const void* dest, const x86Register16& from );
extern void MOV( const x86Register16& to, u16 imm );
extern void MOV( const ModSibStrict<2>& sibdest, u16 imm );
// ---------- 8 Bit Interface -----------
extern void MOV( const x86Register8& to, const x86Register8& from );
extern void MOV( const ModSibBase& sibdest, const x86Register8& from );
extern void MOV( const x86Register8& to, const ModSibBase& sibsrc );
extern void MOV( const x86Register8& to, const void* src );
extern void MOV( const void* dest, const x86Register8& from );
extern void MOV( const x86Register8& to, u8 imm );
extern void MOV( const ModSibStrict<1>& sibdest, u8 imm );
}

View File

@ -1,43 +1,22 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
#include "ix86.h"
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define MEMADDR(addr, oplen) (addr)
#define Rex(w,r,x,b) assert(0)
#define RexR(w, reg) assert( !(w || (reg)>=8) )
#define RexB(w, base) assert( !(w || (base)>=8) )
#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) )
#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) )
#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask))
static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field)
static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
//------------------------------------------------------------------
// General Emitter Helper functions
//------------------------------------------------------------------
namespace x86Emitter
{
extern void EmitSibMagic( int regfield, const ModSib& info );
extern void EmitSibMagic( x86Register32 regfield, const ModSib& info );
extern bool NeedsSibMagic( const ModSib& info );
}
// From here out are the legacy (old) emitter functions...
extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
extern void ModRM( int mod, int reg, int rm );
extern void SibSB( int ss, int index, int base );
extern void SET8R( int cc, int to );
extern u8* J8Rel( int cc, int to );
extern u32* J32Rel( int cc, u32 to );
extern u64 GetCPUTick( void );
//------------------------------------------------------------------
#include "ix86_types.h"
#include "ix86_instructions.h"

View File

@ -30,10 +30,119 @@
#include "PrecompiledHeader.h"
#include "System.h"
#include "ix86_internal.h"
#include "ix86_legacy_internal.h"
using namespace x86Emitter;
template< int OperandSize >
static __forceinline x86Register<OperandSize> _reghlp( x86IntRegType src )
{
return x86Register<OperandSize>( src );
}
static __forceinline ModSibBase _mrmhlp( x86IntRegType src )
{
return ptr[_reghlp<4>(src)];
}
template< int OperandSize >
static __forceinline ModSibStrict<OperandSize> _mhlp( x86IntRegType src )
{
return ModSibStrict<OperandSize>( x86IndexReg::Empty, x86IndexReg(src) );
}
template< int OperandSize >
static __forceinline ModSibStrict<OperandSize> _mhlp2( x86IntRegType src1, x86IntRegType src2 )
{
return ModSibStrict<OperandSize>( x86IndexReg(src2), x86IndexReg(src1) );
}
#define DEFINE_LEGACY_HELPER( cod, bits ) \
emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { cod( _reghlp<bits/8>(to), _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { cod( _reghlp<bits/8>(to), imm ); } \
emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { cod( _reghlp<bits/8>(to), (void*)from ); } \
emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { cod( (void*)to, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { cod( ptr##bits[to], imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { cod( _mhlp<bits/8>(to) + offset, imm ); } \
emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp<bits/8>(to), _mhlp<bits/8>(from) + offset ); } \
emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp<bits/8>(to) + offset, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
{ cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \
{ cod( _reghlp<bits/8>(to), _mhlp2<bits/8>(from1,from2) + offset ); }
#define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \
emitterT void cod##bits##CLtoR( x86IntRegType to ) { cod( _reghlp<bits/8>(to), cl ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { cod( _reghlp<bits/8>(to), imm ); } \
emitterT void cod##bits##CLtoM( uptr to ) { cod( ptr##bits[to], cl ); } \
emitterT void cod##bits##ItoM( uptr to, u8 imm ) { cod( ptr##bits[to], imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { cod( _mhlp<bits/8>(to) + offset, imm ); } \
emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp<bits/8>(to) + offset, cl ); }
//emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
// { cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
#define DEFINE_OPCODE_LEGACY( cod ) \
DEFINE_LEGACY_HELPER( cod, 32 ) \
DEFINE_LEGACY_HELPER( cod, 16 ) \
DEFINE_LEGACY_HELPER( cod, 8 )
#define DEFINE_OPCODE_SHIFT_LEGACY( cod ) \
DEFINE_LEGACY_SHIFT_HELPER( cod, 32 ) \
DEFINE_LEGACY_SHIFT_HELPER( cod, 16 ) \
DEFINE_LEGACY_SHIFT_HELPER( cod, 8 )
//////////////////////////////////////////////////////////////////////////////////////////
//
DEFINE_OPCODE_LEGACY( ADD )
DEFINE_OPCODE_LEGACY( CMP )
DEFINE_OPCODE_LEGACY( OR )
DEFINE_OPCODE_LEGACY( ADC )
DEFINE_OPCODE_LEGACY( SBB )
DEFINE_OPCODE_LEGACY( AND )
DEFINE_OPCODE_LEGACY( SUB )
DEFINE_OPCODE_LEGACY( XOR )
DEFINE_OPCODE_SHIFT_LEGACY( ROL )
DEFINE_OPCODE_SHIFT_LEGACY( ROR )
DEFINE_OPCODE_SHIFT_LEGACY( RCL )
DEFINE_OPCODE_SHIFT_LEGACY( RCR )
DEFINE_OPCODE_SHIFT_LEGACY( SHL )
DEFINE_OPCODE_SHIFT_LEGACY( SHR )
DEFINE_OPCODE_SHIFT_LEGACY( SAR )
DEFINE_OPCODE_LEGACY( MOV )
// mov r32 to [r32<<scale+from2]
emitterT void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
MOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
MOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
MOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
// Special forms needed by the legacy emitter syntax:
emitterT void AND32I8toR( x86IntRegType to, s8 from )
{
AND( _reghlp<4>(to), from );
}
emitterT void AND32I8toM( uptr to, s8 from )
{
AND( ptr8[to], from );
}
// Note: the 'to' field can either be a register or a special opcode extension specifier
// depending on the opcode's encoding.
@ -70,16 +179,6 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset
}
}
emitterT void ModRM( s32 mod, s32 reg, s32 rm )
{
write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) );
}
emitterT void SibSB( s32 ss, s32 index, s32 base )
{
write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) );
}
emitterT void SET8R( int cc, int to )
{
RexB(0, to);
@ -191,43 +290,6 @@ emitterT void x86Align( int bytes )
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
}
////////////////////////////////////////////////////
// Generates executable code to align to the given alignment (could be useful for the second leg
// of if/else conditionals, which usually fall through a jump target label).
//
// Note: Left in for now just in case, but usefulness is moot. Only K8's and older (non-Prescott)
// P4s benefit from this, and we don't optimize for those platforms anyway.
//
void x86AlignExecutable( int align )
{
uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 );
uptr bytes = ( newx86 - (uptr)x86Ptr );
switch( bytes )
{
case 0: break;
case 1: NOP(); break;
case 2: MOV32RtoR( ESI, ESI ); break;
case 3: write8(0x08D); write8(0x024); write8(0x024); break;
case 5: NOP(); // falls through to 4...
case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break;
case 6: write8(0x08D); write8(0x0B6); write32(0); break;
case 8: NOP(); // falls through to 7...
case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break;
default:
{
// for larger alignments, just use a JMP...
u8* aligned_target = JMP8(0);
x86Ptr = (u8*)newx86;
x86SetJ8( aligned_target );
}
}
jASSUME( x86Ptr == (u8*)newx86 );
}
/********************/
/* IX86 instructions */
/********************/
@ -249,281 +311,6 @@ emitterT void NOP( void )
}
////////////////////////////////////
// mov instructions /
////////////////////////////////////
/* mov r32 to r32 */
emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from )
{
if( to == from ) return;
RexRB(0, from, to);
write8( 0x89 );
ModRM( 3, from, to );
}
/* mov r32 to m32 */
emitterT void MOV32RtoM( uptr to, x86IntRegType from )
{
RexR(0, from);
if (from == EAX) {
write8(0xA3);
} else {
write8( 0x89 );
ModRM( 0, from, DISP32 );
}
write32( MEMADDR(to, 4) );
}
/* mov m32 to r32 */
emitterT void MOV32MtoR( x86IntRegType to, uptr from )
{
RexR(0, to);
if (to == EAX) {
write8(0xA1);
} else {
write8( 0x8B );
ModRM( 0, to, DISP32 );
}
write32( MEMADDR(from, 4) );
}
emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset )
{
RexRB(0, to, from);
write8( 0x8B );
WriteRmOffsetFrom(to, from, offset);
}
/* mov [r32+r32*scale] to r32 */
emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
RexRXB(0,to,from2,from);
write8( 0x8B );
ModRM( 0, to, 0x4 );
SibSB(scale, from2, from );
}
// mov r32 to [r32<<scale+from2]
emitterT void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int from2, int scale )
{
RexRXB(0,to,from1,0);
write8( 0x8B );
ModRM( 0, to, 0x4 );
ModRM( scale, from1, 5);
write32(from2);
}
/* mov r32 to [r32][r32*scale] */
emitterT void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
RexRXB(0, to, from2, from);
write8( 0x89 );
ModRM( 0, to, 0x4 );
SibSB(scale, from2, from );
}
/* mov imm32 to r32 */
emitterT void MOV32ItoR( x86IntRegType to, u32 from )
{
RexB(0, to);
write8( 0xB8 | (to & 0x7) );
write32( from );
}
/* mov imm32 to m32 */
emitterT void MOV32ItoM(uptr to, u32 from )
{
write8( 0xC7 );
ModRM( 0, 0, DISP32 );
write32( MEMADDR(to, 8) );
write32( from );
}
// mov imm32 to [r32+off]
emitterT void MOV32ItoRm( x86IntRegType to, u32 from, int offset)
{
RexB(0,to);
write8( 0xC7 );
WriteRmOffsetFrom(0, to, offset);
write32(from);
}
// mov r32 to [r32+off]
emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset)
{
RexRB(0,from,to);
write8( 0x89 );
WriteRmOffsetFrom(from, to, offset);
}
/* mov r16 to r16 */
emitterT void MOV16RtoR( x86IntRegType to, x86IntRegType from )
{
if( to == from ) return;
write8( 0x66 );
RexRB(0, from, to);
write8( 0x89 );
ModRM( 3, from, to );
}
/* mov r16 to m16 */
emitterT void MOV16RtoM(uptr to, x86IntRegType from )
{
write8( 0x66 );
RexR(0,from);
write8( 0x89 );
ModRM( 0, from, DISP32 );
write32( MEMADDR(to, 4) );
}
/* mov m16 to r16 */
emitterT void MOV16MtoR( x86IntRegType to, uptr from )
{
write8( 0x66 );
RexR(0,to);
write8( 0x8B );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void MOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset )
{
write8( 0x66 );
RexRB(0,to,from);
write8( 0x8B );
WriteRmOffsetFrom(to, from, offset);
}
emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale )
{
write8(0x66);
RexRXB(0,to,from1,0);
write8( 0x8B );
ModRM( 0, to, SIB );
SibSB( scale, from1, SIBDISP);
write32(from2);
}
/* mov imm16 to m16 */
emitterT void MOV16ItoM( uptr to, u16 from )
{
write8( 0x66 );
write8( 0xC7 );
ModRM( 0, 0, DISP32 );
write32( MEMADDR(to, 6) );
write16( from );
}
/* mov r16 to [r32][r32*scale] */
emitterT void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
write8( 0x66 );
RexRXB(0,to,from2,from);
write8( 0x89 );
ModRM( 0, to, 0x4 );
SibSB(scale, from2, from );
}
emitterT void MOV16ItoR( x86IntRegType to, u16 from )
{
RexB(0, to);
write16( 0xB866 | ((to & 0x7)<<8) );
write16( from );
}
// mov imm16 to [r16+off]
emitterT void MOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 )
{
write8(0x66);
RexB(0,to);
write8( 0xC7 );
WriteRmOffsetFrom(0, to, offset);
write16(from);
}
// mov r16 to [r16+off]
emitterT void MOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset )
{
write8(0x66);
RexRB(0,from,to);
write8( 0x89 );
WriteRmOffsetFrom(from, to, offset);
}
/* mov r8 to m8 */
emitterT void MOV8RtoM( uptr to, x86IntRegType from )
{
RexR(0,from);
write8( 0x88 );
ModRM( 0, from, DISP32 );
write32( MEMADDR(to, 4) );
}
/* mov m8 to r8 */
emitterT void MOV8MtoR( x86IntRegType to, uptr from )
{
RexR(0,to);
write8( 0x8A );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
emitterT void MOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset)
{
RexRB(0,to,from);
write8( 0x8A );
WriteRmOffsetFrom(to, from, offset);
}
emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale )
{
RexRXB(0,to,from1,0);
write8( 0x8A );
ModRM( 0, to, SIB );
SibSB( scale, from1, SIBDISP);
write32(from2);
}
/* mov imm8 to m8 */
emitterT void MOV8ItoM( uptr to, u8 from )
{
write8( 0xC6 );
ModRM( 0, 0, DISP32 );
write32( MEMADDR(to, 5) );
write8( from );
}
// mov imm8 to r8
emitterT void MOV8ItoR( x86IntRegType to, u8 from )
{
RexB(0, to);
write8( 0xB0 | (to & 0x7) );
write8( from );
}
// mov imm8 to [r8+off]
emitterT void MOV8ItoRm( x86IntRegType to, u8 from, int offset)
{
assert( to != ESP );
RexB(0,to);
write8( 0xC6 );
WriteRmOffsetFrom(0, to,offset);
write8(from);
}
// mov r8 to [r8+off]
emitterT void MOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset)
{
assert( to != ESP );
RexRB(0,from,to);
write8( 0x88 );
WriteRmOffsetFrom(from,to,offset);
}
/* movsx r8 to r32 */
emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from )
{
@ -948,253 +735,6 @@ emitterT void IDIV32M( u32 from )
write32( MEMADDR(from, 4) );
}
////////////////////////////////////
// shifting instructions /
////////////////////////////////////
/* shl imm8 to r32 */
emitterT void SHL32ItoR( x86IntRegType to, u8 from )
{
RexB(0, to);
if ( from == 1 )
{
write8( 0xD1 );
write8( 0xE0 | (to & 0x7) );
return;
}
write8( 0xC1 );
ModRM( 3, 4, to );
write8( from );
}
/* shl imm8 to m32 */
emitterT void SHL32ItoM( uptr to, u8 from )
{
if ( from == 1 )
{
write8( 0xD1 );
ModRM( 0, 4, DISP32 );
write32( MEMADDR(to, 4) );
}
else
{
write8( 0xC1 );
ModRM( 0, 4, DISP32 );
write32( MEMADDR(to, 5) );
write8( from );
}
}
/* shl cl to r32 */
emitterT void SHL32CLtoR( x86IntRegType to )
{
RexB(0,to);
write8( 0xD3 );
ModRM( 3, 4, to );
}
// shl imm8 to r16
emitterT void SHL16ItoR( x86IntRegType to, u8 from )
{
write8(0x66);
RexB(0,to);
if ( from == 1 )
{
write8( 0xD1 );
write8( 0xE0 | (to & 0x7) );
return;
}
write8( 0xC1 );
ModRM( 3, 4, to );
write8( from );
}
// shl imm8 to r8
emitterT void SHL8ItoR( x86IntRegType to, u8 from )
{
RexB(0,to);
if ( from == 1 )
{
write8( 0xD0 );
write8( 0xE0 | (to & 0x7) );
return;
}
write8( 0xC0 );
ModRM( 3, 4, to );
write8( from );
}
/* shr imm8 to r32 */
emitterT void SHR32ItoR( x86IntRegType to, u8 from ) {
RexB(0,to);
if ( from == 1 )
{
write8( 0xD1 );
write8( 0xE8 | (to & 0x7) );
}
else
{
write8( 0xC1 );
ModRM( 3, 5, to );
write8( from );
}
}
/* shr imm8 to m32 */
emitterT void SHR32ItoM( uptr to, u8 from )
{
if ( from == 1 )
{
write8( 0xD1 );
ModRM( 0, 5, DISP32 );
write32( MEMADDR(to, 4) );
}
else
{
write8( 0xC1 );
ModRM( 0, 5, DISP32 );
write32( MEMADDR(to, 5) );
write8( from );
}
}
/* shr cl to r32 */
emitterT void SHR32CLtoR( x86IntRegType to )
{
RexB(0,to);
write8( 0xD3 );
ModRM( 3, 5, to );
}
// shr imm8 to r16
emitterT void SHR16ItoR( x86IntRegType to, u8 from )
{
RexB(0,to);
if ( from == 1 )
{
write8( 0xD1 );
ModRM( 3, 5, to );
}
else
{
write8( 0xC1 );
ModRM( 3, 5, to );
write8( from );
}
}
// shr imm8 to r8
emitterT void SHR8ItoR( x86IntRegType to, u8 from )
{
RexB(0,to);
if ( from == 1 )
{
write8( 0xD0 );
write8( 0xE8 | (to & 0x7) );
}
else
{
write8( 0xC0 );
ModRM( 3, 5, to );
write8( from );
}
}
/* sar imm8 to r32 */
emitterT void SAR32ItoR( x86IntRegType to, u8 from )
{
RexB(0,to);
if ( from == 1 )
{
write8( 0xD1 );
ModRM( 3, 7, to );
return;
}
write8( 0xC1 );
ModRM( 3, 7, to );
write8( from );
}
/* sar imm8 to m32 */
emitterT void SAR32ItoM( uptr to, u8 from )
{
write8( 0xC1 );
ModRM( 0, 7, DISP32 );
write32( MEMADDR(to, 5) );
write8( from );
}
/* sar cl to r32 */
emitterT void SAR32CLtoR( x86IntRegType to )
{
RexB(0,to);
write8( 0xD3 );
ModRM( 3, 7, to );
}
// sar imm8 to r16
emitterT void SAR16ItoR( x86IntRegType to, u8 from )
{
write8(0x66);
RexB(0,to);
if ( from == 1 )
{
write8( 0xD1 );
ModRM( 3, 7, to );
return;
}
write8( 0xC1 );
ModRM( 3, 7, to );
write8( from );
}
/*emitterT void ROR32ItoR( x86IntRegType to,u8 from )
{
RexB(0,to);
if ( from == 1 ) {
write8( 0xd1 );
write8( 0xc8 | to );
}
else
{
write8( 0xc1 );
write8( 0xc8 | to );
write8( from );
}
}*/
emitterT void RCR32ItoR( x86IntRegType to, u8 from )
{
RexB(0,to);
if ( from == 1 ) {
write8( 0xd1 );
ModRM(3, 3, to);
}
else
{
write8( 0xc1 );
ModRM(3, 3, to);
write8( from );
}
}
emitterT void RCR32ItoM( uptr to, u8 from )
{
RexB(0,to);
if ( from == 1 ) {
write8( 0xd1 );
ModRM( 0, 3, DISP32 );
write32( MEMADDR(to, 8) );
}
else
{
write8( 0xc1 );
ModRM( 0, 3, DISP32 );
write32( MEMADDR(to, 8) );
write8( from );
}
}
// shld imm8 to r32
emitterT void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift )
{
@ -1773,34 +1313,34 @@ emitterT void BSWAP32R( x86IntRegType to )
emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset)
{
LEA32( x86Register32( to ), ptr[x86IndexReg(from)+offset] );
LEA( x86Register32( to ), ptr[x86IndexReg(from)+offset] );
}
emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
{
LEA32( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
LEA( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
}
// Don't inline recursive functions
emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
{
LEA32( x86Register32( to ), ptr[x86IndexReg(from)*(1<<scale)] );
LEA( x86Register32( to ), ptr[x86IndexReg(from)*(1<<scale)] );
}
// to = from + offset
emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset)
{
LEA16( x86Register16( to ), ptr[x86IndexReg(from)+offset] );
LEA( x86Register16( to ), ptr[x86IndexReg(from)+offset] );
}
// to = from0 + from1
emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
{
LEA16( x86Register16( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
LEA( x86Register16( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] );
}
// to = from << scale (max is 3)
emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
{
LEA16( x86Register16( to ), ptr[x86IndexReg(from)*(1<<scale)] );
LEA( x86Register16( to ), ptr[x86IndexReg(from)*(1<<scale)] );
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,78 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
#include "ix86_internal.h"
//------------------------------------------------------------------
// Legacy Helper Macros and Functions (depreciated)
//------------------------------------------------------------------
#include "ix86_legacy_types.h"
#include "ix86_legacy_instructions.h"
#define MEMADDR(addr, oplen) (addr)
#define Rex(w,r,x,b) assert(0)
#define RexR(w, reg) assert( !(w || (reg)>=8) )
#define RexB(w, base) assert( !(w || (base)>=8) )
#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) )
#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) )
#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask))
extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
extern void ModRM( uint mod, uint reg, uint rm );
extern void SibSB( uint ss, uint index, uint base );
extern void SET8R( int cc, int to );
extern u8* J8Rel( int cc, int to );
extern u32* J32Rel( int cc, u32 to );
extern u64 GetCPUTick( void );
//////////////////////////////////////////////////////////////////////////////////////////
//
emitterT void ModRM( uint mod, uint reg, uint rm )
{
// Note: Following ASSUMEs are for legacy support only.
// The new emitter performs these sanity checks during operand construction, so these
// assertions can probably be removed once all legacy emitter code has been removed.
jASSUME( mod < 4 );
jASSUME( reg < 8 );
jASSUME( rm < 8 );
//write8( (mod << 6) | (reg << 3) | rm );
*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
x86Ptr++;
}
emitterT void SibSB( uint ss, uint index, uint base )
{
// Note: Following ASSUMEs are for legacy support only.
// The new emitter performs these sanity checks during operand construction, so these
// assertions can probably be removed once all legacy emitter code has been removed.
jASSUME( ss < 4 );
jASSUME( index < 8 );
jASSUME( base < 8 );
//write8( (ss << 6) | (index << 3) | base );
*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
x86Ptr++;
}

View File

@ -0,0 +1,140 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
#define SIB 4 // maps to ESP
#define SIBDISP 5 // maps to EBP
#define DISP32 5 // maps to EBP
// general types
typedef int x86IntRegType;
#define EAX 0
#define EBX 3
#define ECX 1
#define EDX 2
#define ESI 6
#define EDI 7
#define EBP 5
#define ESP 4
#define X86ARG1 EAX
#define X86ARG2 ECX
#define X86ARG3 EDX
#define X86ARG4 EBX
#define MM0 0
#define MM1 1
#define MM2 2
#define MM3 3
#define MM4 4
#define MM5 5
#define MM6 6
#define MM7 7
typedef int x86MMXRegType;
#define XMM0 0
#define XMM1 1
#define XMM2 2
#define XMM3 3
#define XMM4 4
#define XMM5 5
#define XMM6 6
#define XMM7 7
#define XMM8 8
#define XMM9 9
#define XMM10 10
#define XMM11 11
#define XMM12 12
#define XMM13 13
#define XMM14 14
#define XMM15 15
typedef int x86SSERegType;
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
#define SIB 4 // maps to ESP
#define SIBDISP 5 // maps to EBP
#define DISP32 5 // maps to EBP
// general types
typedef int x86IntRegType;
#define EAX 0
#define EBX 3
#define ECX 1
#define EDX 2
#define ESI 6
#define EDI 7
#define EBP 5
#define ESP 4
#define X86ARG1 EAX
#define X86ARG2 ECX
#define X86ARG3 EDX
#define X86ARG4 EBX
#define MM0 0
#define MM1 1
#define MM2 2
#define MM3 3
#define MM4 4
#define MM5 5
#define MM6 6
#define MM7 7
typedef int x86MMXRegType;
#define XMM0 0
#define XMM1 1
#define XMM2 2
#define XMM3 3
#define XMM4 4
#define XMM5 5
#define XMM6 6
#define XMM7 7
#define XMM8 8
#define XMM9 9
#define XMM10 10
#define XMM11 11
#define XMM12 12
#define XMM13 13
#define XMM14 14
#define XMM15 15
typedef int x86SSERegType;

View File

@ -17,7 +17,7 @@
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
#include "ix86_legacy_internal.h"
//------------------------------------------------------------------
// MMX instructions

View File

@ -17,7 +17,7 @@
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
#include "ix86_legacy_internal.h"
#include "ix86_sse_helpers.h"
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -19,7 +19,7 @@
#include "PrecompiledHeader.h"
#include "System.h"
#include "ix86/ix86.h"
#include "ix86.h"
// used to make sure regs don't get changed while in recompiler
// use FreezeMMXRegs, FreezeXMMRegs

View File

@ -23,57 +23,6 @@
#define X86REGS 8
#define MMXREGS 8
#define SIB 4
#define SIBDISP 5
#define DISP32 5
// general types
typedef int x86IntRegType;
#define EAX 0
#define EBX 3
#define ECX 1
#define EDX 2
#define ESI 6
#define EDI 7
#define EBP 5
#define ESP 4
#define X86ARG1 EAX
#define X86ARG2 ECX
#define X86ARG3 EDX
#define X86ARG4 EBX
#define MM0 0
#define MM1 1
#define MM2 2
#define MM3 3
#define MM4 4
#define MM5 5
#define MM6 6
#define MM7 7
typedef int x86MMXRegType;
#define XMM0 0
#define XMM1 1
#define XMM2 2
#define XMM3 3
#define XMM4 4
#define XMM5 5
#define XMM6 6
#define XMM7 7
#define XMM8 8
#define XMM9 9
#define XMM10 10
#define XMM11 11
#define XMM12 12
#define XMM13 13
#define XMM14 14
#define XMM15 15
typedef int x86SSERegType;
enum XMMSSEType
{
XMMT_INT = 0, // integer (sse2 only)
@ -149,104 +98,164 @@ struct CPUINFO{
};
extern CPUINFO cpuinfo;
//------------------------------------------------------------------
//------------------------------------------------------------------
#ifdef _MSC_VER
#define __threadlocal __declspec(thread)
#else
#define __threadlocal __thread
#endif
extern __threadlocal u8 *x86Ptr;
extern __threadlocal u8 *j8Ptr[32];
extern __threadlocal u32 *j32Ptr[32];
//------------------------------------------------------------------
// templated version of is_s8 is required, so that u16's get correct sign extension treatment.
template< typename T >
static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; }
template< typename T >
static __forceinline void iWrite( T val )
{
*(T*)x86Ptr = val;
x86Ptr += sizeof(T);
}
namespace x86Emitter
{
class x86ModRm;
//////////////////////////////////////////////////////////////////////////////////////////
//
struct x86Register32
/////////////////////////////////////////////////////////////////////////////////////////////
// __emitline - preprocessors definition
//
// This is configured to inline emitter functions appropriately for release builds, and
// disables some of the more aggressive inlines for dev builds (which can be helpful when
// debugging).
//
// Note: I use __forceinline directly for most single-line class members, when needed.
// There's no point in using __emitline in these cases since the debugger can't trace into
// single-line functions anyway.
//
#ifdef PCSX2_DEVBUILD
#define __emitinline
#else
#define __emitinline __forceinline
#endif
#ifdef _MSC_VER
# define __noinline __declspec(noinline)
#else
# define __noinline
#endif
static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field)
static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
class x86AddressInfo;
class ModSibBase;
static __forceinline void write8( u8 val )
{
static const x86Register32 Empty; // defined as an empty/unused value (-1)
int Id;
iWrite( val );
}
x86Register32( const x86Register32& src ) : Id( src.Id ) {}
x86Register32() : Id( -1 ) {}
explicit x86Register32( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
static __forceinline void write16( u16 val )
{
iWrite( val );
}
bool IsEmpty() const { return Id == -1; }
static __forceinline void write24( u32 val )
{
*(u32*)x86Ptr = val;
x86Ptr += 3;
}
bool operator==( const x86Register32& src ) const { return Id == src.Id; }
bool operator!=( const x86Register32& src ) const { return Id != src.Id; }
x86ModRm operator+( const x86Register32& right ) const;
x86ModRm operator+( const x86ModRm& right ) const;
x86ModRm operator+( s32 right ) const;
static __forceinline void write32( u32 val )
{
iWrite( val );
}
static __forceinline void write64( u64 val )
{
iWrite( val );
}
x86ModRm operator*( u32 factor ) const;
x86Register32& operator=( const x86Register32& src )
{
Id = src.Id;
return *this;
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Similar to x86Register, but without the ability to add/combine them with ModSib.
//
class x86Register16
template< int OperandSize >
class x86Register
{
public:
static const x86Register16 Empty;
static const x86Register Empty; // defined as an empty/unused value (-1)
int Id;
x86Register16( const x86Register16& src ) : Id( src.Id ) {}
x86Register16() : Id( -1 ) {}
explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
x86Register( const x86Register<OperandSize>& src ) : Id( src.Id ) {}
x86Register(): Id( -1 ) {}
explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
bool IsEmpty() const { return Id == -1; }
bool operator==( const x86Register16& src ) const { return Id == src.Id; }
bool operator!=( const x86Register16& src ) const { return Id != src.Id; }
// Returns true if the register is a valid accumulator: Eax, Ax, Al.
bool IsAccumulator() const { return Id == 0; }
x86Register16& operator=( const x86Register16& src )
bool operator==( const x86Register<OperandSize>& src ) const
{
return (Id == src.Id);
}
bool operator!=( const x86Register<OperandSize>& src ) const
{
return (Id != src.Id);
}
x86Register<OperandSize>& operator=( const x86Register<OperandSize>& src )
{
Id = src.Id;
return *this;
}
};
typedef x86Register<4> x86Register32;
typedef x86Register<2> x86Register16;
typedef x86Register<1> x86Register8;
//////////////////////////////////////////////////////////////////////////////////////////
// Similar to x86Register, but without the ability to add/combine them with ModSib.
//
class x86Register8
// Use 32 bit registers as out index register (for ModSib memory address calculations)
// Only x86IndexReg provides operators for constructing x86AddressInfo types.
class x86IndexReg : public x86Register32
{
public:
static const x86Register8 Empty;
static const x86IndexReg Empty; // defined as an empty/unused value (-1)
public:
x86IndexReg(): x86Register32() {}
x86IndexReg( const x86IndexReg& src ) : x86Register32( src.Id ) {}
x86IndexReg( const x86Register32& src ) : x86Register32( src ) {}
explicit x86IndexReg( int regId ) : x86Register32( regId ) {}
int Id;
// Returns true if the register is the stack pointer: ESP.
bool IsStackPointer() const { return Id == 4; }
x86Register8( const x86Register16& src ) : Id( src.Id ) {}
x86Register8() : Id( -1 ) {}
explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
x86AddressInfo operator+( const x86IndexReg& right ) const;
x86AddressInfo operator+( const x86AddressInfo& right ) const;
x86AddressInfo operator+( s32 right ) const;
bool IsEmpty() const { return Id == -1; }
bool operator==( const x86Register8& src ) const { return Id == src.Id; }
bool operator!=( const x86Register8& src ) const { return Id != src.Id; }
x86Register8& operator=( const x86Register8& src )
x86AddressInfo operator*( u32 factor ) const;
x86AddressInfo operator<<( u32 shift ) const;
x86IndexReg& operator=( const x86Register32& src )
{
Id = src.Id;
return *this;
}
};
// Use 32 bit registers as out index register (for ModSig memory address calculations)
typedef x86Register32 x86IndexReg;
//////////////////////////////////////////////////////////////////////////////////////////
//
class x86ModRm
class x86AddressInfo
{
public:
x86IndexReg Base; // base register (no scale)
@ -255,7 +264,7 @@ namespace x86Emitter
s32 Displacement; // address displacement
public:
x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) :
__forceinline x86AddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) :
Base( base ),
Index( index ),
Factor( factor ),
@ -263,7 +272,7 @@ namespace x86Emitter
{
}
explicit x86ModRm( x86IndexReg base, int displacement=0 ) :
__forceinline explicit x86AddressInfo( const x86IndexReg& base, int displacement=0 ) :
Base( base ),
Index(),
Factor(0),
@ -271,7 +280,7 @@ namespace x86Emitter
{
}
explicit x86ModRm( s32 displacement ) :
__forceinline explicit x86AddressInfo( s32 displacement ) :
Base(),
Index(),
Factor(0),
@ -279,62 +288,87 @@ namespace x86Emitter
{
}
static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 );
static x86AddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 );
public:
bool IsByteSizeDisp() const { return is_s8( Displacement ); }
x86IndexReg GetEitherReg() const;
x86ModRm& Add( s32 imm )
__forceinline x86AddressInfo& Add( s32 imm )
{
Displacement += imm;
return *this;
}
x86ModRm& Add( const x86IndexReg& src );
x86ModRm& Add( const x86ModRm& src );
__forceinline x86AddressInfo& Add( const x86IndexReg& src );
__forceinline x86AddressInfo& Add( const x86AddressInfo& src );
x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); }
x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); }
x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); }
x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); }
__forceinline x86AddressInfo operator+( const x86IndexReg& right ) const { return x86AddressInfo( *this ).Add( right ); }
__forceinline x86AddressInfo operator+( const x86AddressInfo& right ) const { return x86AddressInfo( *this ).Add( right ); }
__forceinline x86AddressInfo operator+( s32 imm ) const { return x86AddressInfo( *this ).Add( imm ); }
__forceinline x86AddressInfo operator-( s32 imm ) const { return x86AddressInfo( *this ).Add( -imm ); }
};
//////////////////////////////////////////////////////////////////////////////////////////
// ModSib - Internal low-level representation of the ModRM/SIB information.
//
// This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that
// the Base, Index, Scale, and Displacement values are all valid, and it serves as a type-
// safe layer between the x86Register's operators (which generate x86ModRm types) and the
// emitter's ModSib instruction forms. Without this, the x86Register would pass as a
// ModSib type implicitly, and that would cause ambiguity on a number of instructions.
// This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means
// that the Base, Index, Scale, and Displacement values are all in the correct arrange-
// ments, and it serves as a type-safe layer between the x86Register's operators (which
// generate x86AddressInfo types) and the emitter's ModSib instruction forms. Without this,
// the x86Register would pass as a ModSib type implicitly, and that would cause ambiguity
// on a number of instructions.
//
class ModSib
// End users should always use x86AddressInfo instead.
//
class ModSibBase
{
public:
x86IndexReg Base; // base register (no scale)
x86IndexReg Index; // index reg gets multiplied by the scale
int Scale; // scale applied to the index register, in scale/shift form
uint Scale; // scale applied to the index register, in scale/shift form
s32 Displacement; // offset applied to the Base/Index registers.
explicit ModSib( const x86ModRm& src );
explicit ModSib( s32 disp );
ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 );
public:
explicit ModSibBase( const x86AddressInfo& src );
explicit ModSibBase( s32 disp );
ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 );
x86IndexReg GetEitherReg() const;
bool IsByteSizeDisp() const { return is_s8( Displacement ); }
ModSib& Add( s32 imm )
__forceinline ModSibBase& Add( s32 imm )
{
Displacement += imm;
return *this;
}
ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); }
ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); }
__forceinline ModSibBase operator+( const s32 imm ) const { return ModSibBase( *this ).Add( imm ); }
__forceinline ModSibBase operator-( const s32 imm ) const { return ModSibBase( *this ).Add( -imm ); }
protected:
void Reduce();
__forceinline void Reduce();
};
//////////////////////////////////////////////////////////////////////////////////////////
// Strictly-typed version of ModSibBase, which is used to apply operand size information
// to ImmToMem operations.
//
template< int OperandSize >
class ModSibStrict : public ModSibBase
{
public:
__forceinline explicit ModSibStrict( const x86AddressInfo& src ) : ModSibBase( src ) {}
__forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {}
__forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) :
ModSibBase( base, index, scale, displacement ) {}
__forceinline ModSibStrict<OperandSize>& Add( s32 imm )
{
Displacement += imm;
return *this;
}
__forceinline ModSibStrict<OperandSize> operator+( const s32 imm ) const { return ModSibStrict<OperandSize>( *this ).Add( imm ); }
__forceinline ModSibStrict<OperandSize> operator-( const s32 imm ) const { return ModSibStrict<OperandSize>( *this ).Add( -imm ); }
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -344,33 +378,390 @@ namespace x86Emitter
{
// passthrough instruction, allows ModSib to pass silently through ptr translation
// without doing anything and without compiler error.
const ModSib& operator[]( const ModSib& src ) const { return src; }
const ModSibBase& operator[]( const ModSibBase& src ) const { return src; }
ModSib operator[]( x86IndexReg src ) const
__forceinline ModSibBase operator[]( x86IndexReg src ) const
{
return ModSib( src, x86IndexReg::Empty );
return ModSibBase( src, x86IndexReg::Empty );
}
ModSib operator[]( const x86ModRm& src ) const
__forceinline ModSibBase operator[]( const x86AddressInfo& src ) const
{
return ModSib( src );
return ModSibBase( src );
}
ModSib operator[]( uptr src ) const
__forceinline ModSibBase operator[]( uptr src ) const
{
return ModSib( src );
return ModSibBase( src );
}
ModSib operator[]( void* src ) const
__forceinline ModSibBase operator[]( const void* src ) const
{
return ModSib( (uptr)src );
return ModSibBase( (uptr)src );
}
x86IndexerType() {}
};
// ------------------------------------------------------------------------
//////////////////////////////////////////////////////////////////////////////////////////
// Explicit version of ptr[], in the form of ptr32[], ptr16[], etc. which allows
// specification of the operand size for ImmToMem operations.
//
template< int OperandSize >
struct x86IndexerTypeExplicit
{
// passthrough instruction, allows ModSib to pass silently through ptr translation
// without doing anything and without compiler error.
const ModSibStrict<OperandSize>& operator[]( const ModSibStrict<OperandSize>& src ) const { return src; }
__forceinline ModSibStrict<OperandSize> operator[]( x86IndexReg src ) const
{
return ModSibStrict<OperandSize>( src, x86IndexReg::Empty );
}
__forceinline ModSibStrict<OperandSize> operator[]( const x86AddressInfo& src ) const
{
return ModSibStrict<OperandSize>( src );
}
__forceinline ModSibStrict<OperandSize> operator[]( uptr src ) const
{
return ModSibStrict<OperandSize>( src );
}
__forceinline ModSibStrict<OperandSize> operator[]( const void* src ) const
{
return ModSibStrict<OperandSize>( (uptr)src );
}
};
extern const x86IndexerType ptr;
extern const x86IndexerTypeExplicit<4> ptr32;
extern const x86IndexerTypeExplicit<2> ptr16;
extern const x86IndexerTypeExplicit<1> ptr8;
//////////////////////////////////////////////////////////////////////////////////////////
//
namespace Internal
{
extern void ModRM( uint mod, uint reg, uint rm );
extern void SibSB( u32 ss, u32 index, u32 base );
extern void EmitSibMagic( uint regfield, const ModSibBase& info );
struct SibMagic
{
static void Emit( uint regfield, const ModSibBase& info )
{
EmitSibMagic( regfield, info );
}
};
struct SibMagicInline
{
static __forceinline void Emit( uint regfield, const ModSibBase& info )
{
EmitSibMagic( regfield, info );
}
};
enum G1Type
{
G1Type_ADD=0,
G1Type_OR,
G1Type_ADC,
G1Type_SBB,
G1Type_AND,
G1Type_SUB,
G1Type_XOR,
G1Type_CMP
};
enum G2Type
{
G2Type_ROL=0,
G2Type_ROR,
G2Type_RCL,
G2Type_RCR,
G2Type_SHL,
G2Type_SHR,
G2Type_Unused,
G2Type_SAR
};
// -------------------------------------------------------------------
template< typename ImmType, G1Type InstType, typename SibMagicType >
class Group1Impl
{
public:
static const uint OperandSize = sizeof(ImmType);
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __emitinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
ModRM( 3, from.Id, to.Id );
}
static __emitinline void Emit( const ModSibBase& sibdest, const x86Register<OperandSize>& from )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0 : 1) | (InstType<<3) );
SibMagicType::Emit( from.Id, sibdest );
}
static __emitinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& sibsrc )
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 2 : 3) | (InstType<<3) );
SibMagicType::Emit( to.Id, sibsrc );
}
static __emitinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
{
if( !Is8BitOperand() && is_s8( imm ) )
{
iWrite<u8>( 0x83 );
ModRM( 3, InstType, to.Id );
iWrite<s8>( imm );
}
else
{
prefix16();
if( to.IsAccumulator() )
iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) );
else
{
iWrite<u8>( Is8BitOperand() ? 0x80 : 0x81 );
ModRM( 3, InstType, to.Id );
}
iWrite<ImmType>( imm );
}
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, ImmType imm )
{
if( Is8BitOperand() )
{
iWrite<u8>( 0x80 );
SibMagicType::Emit( InstType, sibdest );
iWrite<ImmType>( imm );
}
else
{
prefix16();
iWrite<u8>( is_s8( imm ) ? 0x83 : 0x81 );
SibMagicType::Emit( InstType, sibdest );
if( is_s8( imm ) )
iWrite<s8>( imm );
else
iWrite<ImmType>( imm );
}
}
};
// -------------------------------------------------------------------
// Group 2 (shift) instructions have no Sib/ModRM forms.
// Note: For Imm forms, we ignore the instruction if the shift count is zero. This
// is a safe optimization since any zero-value shift does not affect any flags.
//
template< typename ImmType, G2Type InstType, typename SibMagicType >
class Group2Impl
{
public:
static const uint OperandSize = sizeof(ImmType);
protected:
static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
static __emitinline void Emit( const x86Register<OperandSize>& to, const x86Register8& from )
{
jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?)
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
ModRM( 3, InstType, to.Id );
}
static __emitinline void Emit( const x86Register<OperandSize>& to, u8 imm )
{
if( imm == 0 ) return;
prefix16();
if( imm == 1 )
{
// special encoding of 1's
iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
ModRM( 3, InstType, to.Id );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
ModRM( 3, InstType, to.Id );
iWrite<u8>( imm );
}
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, const x86Register8& from )
{
jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?)
prefix16();
iWrite<u8>( Is8BitOperand() ? 0xd2 : 0xd3 );
SibMagicType::Emit( from.Id, sibdest );
}
static __emitinline void Emit( const ModSibStrict<OperandSize>& sibdest, u8 imm )
{
if( imm == 0 ) return;
prefix16();
if( imm == 1 )
{
// special encoding of 1's
iWrite<u8>( Is8BitOperand() ? 0xd0 : 0xd1 );
SibMagicType::Emit( InstType, sibdest );
}
else
{
iWrite<u8>( Is8BitOperand() ? 0xc0 : 0xc1 );
SibMagicType::Emit( InstType, sibdest );
iWrite<u8>( imm );
}
}
};
// -------------------------------------------------------------------
//
template< G1Type InstType >
class Group1ImplAll
{
protected:
typedef Group1Impl<u32, InstType, SibMagic> m_32;
typedef Group1Impl<u16, InstType, SibMagic> m_16;
typedef Group1Impl<u8, InstType, SibMagic> m_8;
typedef Group1Impl<u32, InstType, SibMagicInline> m_32i;
typedef Group1Impl<u16, InstType, SibMagicInline> m_16i;
typedef Group1Impl<u8, InstType, SibMagicInline> m_8i;
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// cretion of the compiler.
//
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
__forceinline void operator()( const x86Register32& to, const x86Register32& from ) const { m_32i::Emit( to, from ); }
__forceinline void operator()( const x86Register32& to, const void* src ) const { m_32i::Emit( to, ptr32[src] ); }
__forceinline void operator()( const void* dest, const x86Register32& from ) const { m_32i::Emit( ptr32[dest], from ); }
__noinline void operator()( const ModSibBase& sibdest, const x86Register32& from ) const { m_32::Emit( sibdest, from ); }
__noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const x86Register32& to, u32 imm ) const { m_32i::Emit( to, imm ); }
// ---------- 16 Bit Interface -----------
__forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); }
__forceinline void operator()( const x86Register16& to, const void* src ) const { m_16i::Emit( to, ptr16[src] ); }
__forceinline void operator()( const void* dest, const x86Register16& from ) const { m_16i::Emit( ptr16[dest], from ); }
__noinline void operator()( const ModSibBase& sibdest, const x86Register16& from ) const { m_16::Emit( sibdest, from ); }
__noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const x86Register16& to, u16 imm ) const { m_16i::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); }
__forceinline void operator()( const x86Register8& to, const void* src ) const { m_8i::Emit( to, ptr8[src] ); }
__forceinline void operator()( const void* dest, const x86Register8& from ) const { m_8i::Emit( ptr8[dest], from ); }
__noinline void operator()( const ModSibBase& sibdest, const x86Register8& from ) const { m_8::Emit( sibdest, from ); }
__noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); }
};
// -------------------------------------------------------------------
//
template< G2Type InstType >
class Group2ImplAll
{
protected:
typedef Group2Impl<u32, InstType, SibMagic> m_32;
typedef Group2Impl<u16, InstType, SibMagic> m_16;
typedef Group2Impl<u8, InstType, SibMagic> m_8;
typedef Group2Impl<u32, InstType, SibMagicInline> m_32i;
typedef Group2Impl<u16, InstType, SibMagicInline> m_16i;
typedef Group2Impl<u8, InstType, SibMagicInline> m_8i;
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// cretion of the compiler.
//
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
__forceinline void operator()( const x86Register32& to, const x86Register8& from ) const{ m_32i::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, const x86Register8& from ) const{ m_32::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const x86Register32& to, u8 imm ) const { m_32i::Emit( to, imm ); }
// ---------- 16 Bit Interface -----------
__forceinline void operator()( const x86Register16& to, const x86Register8& from ) const{ m_16i::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, const x86Register8& from ) const{ m_16::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const x86Register16& to, u8 imm ) const { m_16i::Emit( to, imm ); }
// ---------- 8 Bit Interface -----------
__forceinline void operator()( const x86Register8& to, const x86Register8& from ) const{ m_8i::Emit( to, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); }
};
// Define the externals for Group1/2 instructions here (inside the Internal namespace).
// and then import then into the x86Emitter namespace later. Done because it saves a
// lot of Internal:: namespace resolution mess, and is better than the alternative of
// importing Internal into x86Emitter, which done at the header file level would defeat
// the purpose!)
extern const Group1ImplAll<G1Type_ADD> ADD;
extern const Group1ImplAll<G1Type_OR> OR;
extern const Group1ImplAll<G1Type_ADC> ADC;
extern const Group1ImplAll<G1Type_SBB> SBB;
extern const Group1ImplAll<G1Type_AND> AND;
extern const Group1ImplAll<G1Type_SUB> SUB;
extern const Group1ImplAll<G1Type_XOR> XOR;
extern const Group1ImplAll<G1Type_CMP> CMP;
extern const Group2ImplAll<G2Type_ROL> ROL;
extern const Group2ImplAll<G2Type_ROR> ROR;
extern const Group2ImplAll<G2Type_RCL> RCL;
extern const Group2ImplAll<G2Type_RCR> RCR;
extern const Group2ImplAll<G2Type_SHL> SHL;
extern const Group2ImplAll<G2Type_SHR> SHR;
extern const Group2ImplAll<G2Type_SAR> SAR;
}
// ------------------------------------------------------------------------
extern const x86Register32 eax;
extern const x86Register32 ebx;
@ -398,4 +789,6 @@ namespace x86Emitter
extern const x86Register8 ch;
extern const x86Register8 dh;
extern const x86Register8 bh;
}
}
#include "ix86_inlines.inl"