From 6cceed6268e30debbfe1f73e01e2a5ec66d71998 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 14 Apr 2009 01:26:57 +0000 Subject: [PATCH] Many Emitter updates: * added implementations for MOV and Shift instructions (SHL, SHR, ROL, ROR, etc). * Improved compilation optimization considerably, by improving inlining selection in cases where constant propagation can be resolved reliably. * Moved lots of code around, so that the new emitter and the legacy emitter are more clearly separated; and renamed some vars. * Changed recompilers to initialize the recBlocks array to 0xcc instead of 0xcd (fills the blocks with the single-byte instruction INT3, which fixes the misalignment mess that would sometimes happen when using disasm views on the RecBlocks contents). * Switched back to /O2 (Optimize for Speed) instead of /Ox, since MSVC (for me) generally fails to optimize Thread-Local storage in /Ox mode. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@971 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Memory.cpp | 4 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 23 +- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/iVUmicroLower.cpp | 21 +- pcsx2/x86/iVUzerorec.cpp | 8 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 2 +- pcsx2/x86/ix86-32/recVTLB.cpp | 26 +- pcsx2/x86/ix86/ix86.cpp | 871 ++++++----- pcsx2/x86/ix86/ix86.h | 1541 +------------------- pcsx2/x86/ix86/ix86_3dnow.cpp | 2 +- pcsx2/x86/ix86/ix86_cpudetect.cpp | 3 +- pcsx2/x86/ix86/ix86_fpu.cpp | 2 +- pcsx2/x86/ix86/ix86_inlines.inl | 237 +++ pcsx2/x86/ix86/ix86_instructions.h | 107 ++ pcsx2/x86/ix86/ix86_internal.h | 59 +- pcsx2/x86/ix86/ix86_legacy.cpp | 692 ++------- pcsx2/x86/ix86/ix86_legacy_instructions.h | 1422 ++++++++++++++++++ pcsx2/x86/ix86/ix86_legacy_internal.h | 78 + pcsx2/x86/ix86/ix86_legacy_types.h | 140 ++ pcsx2/x86/ix86/ix86_mmx.cpp | 2 +- pcsx2/x86/ix86/ix86_sse.cpp | 2 +- pcsx2/x86/ix86/ix86_tools.cpp | 2 +- pcsx2/x86/ix86/ix86_types.h | 679 +++++++-- 23 files changed, 3286 insertions(+), 2639 deletions(-) create mode 100644 pcsx2/x86/ix86/ix86_inlines.inl create mode 100644 pcsx2/x86/ix86/ix86_instructions.h create mode 100644 pcsx2/x86/ix86/ix86_legacy_instructions.h create mode 100644 pcsx2/x86/ix86/ix86_legacy_internal.h create mode 100644 pcsx2/x86/ix86/ix86_legacy_types.h diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index b30a545335..8d993e7b70 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -517,8 +517,8 @@ void __fastcall vuMicroRead128(u32 addr,mem128_t* data) data[1]=*(u64*)&vu->Micro[addr+8]; } -// [TODO] : Profile this code and see how often the VUs get written, and how -// often it changes the values being written (invoking a cpuClear). +// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per +// frame in-game, and usually none at all after BIOS), so cpu clears aren't much of a big deal. template void __fastcall vuMicroWrite8(u32 addr,mem8_t data) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index ba2df10aca..6ed073af1e 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -217,7 +217,6 @@ /> + + + + + + + + @@ -2959,6 +2974,10 @@ > + + diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 38e2021609..66a78c0105 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -538,7 +538,7 @@ void recResetIOP() DevCon::Status( "iR3000A Resetting recompiler memory and structures" ); - memset_8<0xcd,RECMEM_SIZE>( recMem ); + memset_8<0xcc,RECMEM_SIZE>( recMem ); // 0xcc is INT3 iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc, (((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4))); diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 9f9d816cf7..0c2ab15768 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -799,17 +799,22 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); } else { + + // (this is one of my test cases for the new emitter --air) + using namespace x86Emitter; + if ( x86reg >= 0 ) { - if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset); - if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4); - if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8); - if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12); + x86IndexReg thisreg( x86reg ); + if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000 ); + if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000 ); + if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000 ); + if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000); } else { - if ( _X ) MOV32ItoM(offset, 0x00000000); - if ( _Y ) MOV32ItoM(offset+4, 0x00000000); - if ( _Z ) MOV32ItoM(offset+8, 0x00000000); - if ( _W ) MOV32ItoM(offset+12, 0x3f800000); + if ( _X ) MOV(ptr32[offset], 0x00000000); + if ( _Y ) MOV(ptr32[offset+4], 0x00000000); + if ( _Z ) MOV(ptr32[offset+8], 0x00000000); + if ( _W ) MOV(ptr32[offset+14], 0x3f800000); } } return; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 0f454e638e..bebcd3cdf3 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -3569,7 +3569,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1; if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) { @@ -3578,7 +3578,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1; x86SetJ8( j8Ptr[ 1 ] ); @@ -3815,7 +3815,7 @@ void recVUMI_B( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } @@ -3841,7 +3841,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 2dd76b7778..a95147c1b4 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -531,7 +531,7 @@ void recResetEE( void ) maxrecmem = 0; - memset_8<0xcd, REC_CACHEMEM>(recMem); + memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3 memzero_ptr( m_recBlockAlloc ); ClearRecLUT((BASEBLOCK*)m_recBlockAlloc, (((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4))); diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index dd45f51d82..564a636c3e 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -31,14 +31,24 @@ using namespace vtlb_private; // (used as an equivalent to movaps, when a free XMM register is unavailable for some reason) void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { - MOV32RmtoR(EAX,srcRm); - MOV32RtoRm(destRm,EAX); - MOV32RmtoR(EAX,srcRm,4); - MOV32RtoRm(destRm,EAX,4); - MOV32RmtoR(EAX,srcRm,8); - MOV32RtoRm(destRm,EAX,8); - MOV32RmtoR(EAX,srcRm,12); - MOV32RtoRm(destRm,EAX,12); + // (this is one of my test cases for the new emitter --air) + + using namespace x86Emitter; + + x86IndexReg src( srcRm ); + x86IndexReg dest( destRm ); + + MOV( eax, ptr[src] ); + MOV( ptr[dest], eax ); + + MOV( eax, ptr[src+4] ); + MOV( ptr[dest+4], eax ); + + MOV( eax, ptr[src+8] ); + MOV( ptr[dest+8], eax ); + + MOV( eax, ptr[src+12] ); + MOV( ptr[dest+12], eax ); } /* diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index f62cabf789..b3de07a04a 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -15,13 +15,20 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + /* - * ix86 core v0.6.2 - * Authors: linuzappz - * alexey silinov - * goldfinger - * zerofrog(@gmail.com) - * cottonvibes(@gmail.com) + * ix86 core v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) */ #include "PrecompiledHeader.h" @@ -29,310 +36,363 @@ #include "System.h" #include "ix86_internal.h" +// ------------------------------------------------------------------------ +// Notes on Thread Local Storage: +// * TLS is pretty simple, and "just works" from a programmer perspective, with only +// some minor additional computational overhead (see performance notes below). +// +// * MSVC and GCC handle TLS differently internally, but behavior to the programmer is +// generally identical. +// +// Performance Considerations: +// * GCC's implementation involves an extra dereference from normal storage. +// +// * MSVC's implementation involves *two* extra dereferences from normal storage because +// it has to look up the TLS heap pointer from the Windows Thread Storage Area. (in +// generated ASM code, this dereference is denoted by access to the fs:[2ch] address). +// +// * However, in either case, the optimizer usually optimizes it to a register so the +// extra overhead is minimal over a series of instructions. (Note!! the Full Opt- +// imization [/Ox] option effectively disables TLS optimizations in MSVC, causing +// generally significant code bloat). +// + + __threadlocal u8 *x86Ptr; __threadlocal u8 *j8Ptr[32]; __threadlocal u32 *j32Ptr[32]; -PCSX2_ALIGNED16(u32 p[4]); -PCSX2_ALIGNED16(u32 p2[4]); -PCSX2_ALIGNED16(float f[4]); - XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { const x86IndexerType ptr; - -////////////////////////////////////////////////////////////////////////////////////////// -// -const x86Register32 x86Register32::Empty( -1 ); - -const x86Register32 eax( 0 ); -const x86Register32 ebx( 3 ); -const x86Register32 ecx( 1 ); -const x86Register32 edx( 2 ); -const x86Register32 esi( 6 ); -const x86Register32 edi( 7 ); -const x86Register32 ebp( 5 ); -const x86Register32 esp( 4 ); - -const x86Register16 ax( 0 ); -const x86Register16 bx( 3 ); -const x86Register16 cx( 1 ); -const x86Register16 dx( 2 ); -const x86Register16 si( 6 ); -const x86Register16 di( 7 ); -const x86Register16 bp( 5 ); -const x86Register16 sp( 4 ); - -const x86Register8 al( 0 ); -const x86Register8 cl( 1 ); -const x86Register8 dl( 2 ); -const x86Register8 bl( 3 ); -const x86Register8 ah( 4 ); -const x86Register8 ch( 5 ); -const x86Register8 dh( 6 ); -const x86Register8 bh( 7 ); - -////////////////////////////////////////////////////////////////////////////////////////// -// x86Register Method Implementations -// -x86ModRm x86Register32::operator+( const x86Register32& right ) const -{ - return x86ModRm( *this, right ); -} - -x86ModRm x86Register32::operator+( const x86ModRm& right ) const -{ - return right + *this; -} - -x86ModRm x86Register32::operator+( s32 right ) const -{ - return x86ModRm( *this, right ); -} - -x86ModRm x86Register32::operator*( u32 right ) const -{ - return x86ModRm( Empty, *this, right ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// x86ModRm Method Implementations -// -x86ModRm& x86ModRm::Add( const x86IndexReg& src ) -{ - if( src == Index ) - { - Factor++; - } - else if( src == Base ) - { - // Compound the existing register reference into the Index/Scale pair. - Base = x86IndexReg::Empty; - - if( src == Index ) - Factor++; - else - { - jASSUME( Index.IsEmpty() ); // or die if we already have an index! - Index = src; - Factor = 2; - } - } - else if( Base.IsEmpty() ) - Base = src; - else if( Index.IsEmpty() ) - Index = src; - else - assert( false ); // oops, only 2 regs allowed per ModRm! - - return *this; -} - -x86ModRm& x86ModRm::Add( const x86ModRm& src ) -{ - Add( src.Base ); - Add( src.Displacement ); - - // If the factor is 1, we can just treat index like a base register also. - if( src.Factor == 1 ) - { - Add( src.Index ); - } - else if( Index.IsEmpty() ) - { - Index = src.Index; - Factor = 1; - } - else if( Index == src.Index ) - Factor++; - else - assert( false ); // oops, only 2 regs allowed! - - return *this; -} - -////////////////////////////////////////////////////////////////////////////////////////// -// ModSib Method Implementations -// +const x86IndexerTypeExplicit<4> ptr32; +const x86IndexerTypeExplicit<2> ptr16; +const x86IndexerTypeExplicit<1> ptr8; // ------------------------------------------------------------------------ -// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. -// Necessary because by default ModSib compounds registers into Index when possible. -// -void ModSib::Reduce() +const x86Register32 x86Register32::Empty; +const x86Register16 x86Register16::Empty; +const x86Register8 x86Register8::Empty; +const x86IndexReg x86IndexReg::Empty; + +const x86Register32 + eax( 0 ), ebx( 3 ), + ecx( 1 ), edx( 2 ), + esi( 6 ), edi( 7 ), + ebp( 5 ), esp( 4 ); + +const x86Register16 + ax( 0 ), bx( 3 ), + cx( 1 ), dx( 2 ), + si( 6 ), di( 7 ), + bp( 5 ), sp( 4 ); + +const x86Register8 + al( 0 ), cl( 1 ), + dl( 2 ), bl( 3 ), + ah( 4 ), ch( 5 ), + dh( 6 ), bh( 7 ); + +namespace Internal { - // If no index reg, then load the base register into the index slot. - if( Index.IsEmpty() ) + const Group1ImplAll ADD; + const Group1ImplAll OR; + const Group1ImplAll ADC; + const Group1ImplAll SBB; + const Group1ImplAll AND; + const Group1ImplAll SUB; + const Group1ImplAll XOR; + const Group1ImplAll CMP; + + const Group2ImplAll ROL; + const Group2ImplAll ROR; + const Group2ImplAll RCL; + const Group2ImplAll RCR; + const Group2ImplAll SHL; + const Group2ImplAll SHR; + const Group2ImplAll SAR; + + // Performance note: VC++ wants to use byte/word register form for the following + // ModRM/SibSB constructors if we use iWrite, and furthermore unrolls the + // the shift using a series of ADDs for the following results: + // add cl,cl + // add cl,cl + // add cl,cl + // or cl,bl + // add cl,cl + // ... etc. + // + // This is unquestionably bad optimization by Core2 standard, an generates tons of + // register aliases and false dependencies. (although may have been ideal for early- + // brand P4s with a broken barrel shifter?). The workaround is to do our own manual + // x86Ptr access and update using a u32 instead of u8. Thanks to little endianness, + // the same end result is achieved and no false dependencies are generated. + // + // (btw, I know this isn't a critical performance item by any means, but it's + // annoying simply because it *should* be an easy thing to optimize) + + __forceinline void ModRM( uint mod, uint reg, uint rm ) + { + *(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; + x86Ptr++; + } + + __forceinline void SibSB( u32 ss, u32 index, u32 base ) + { + *(u32*)x86Ptr = (ss << 6) | (index << 3) | base; + x86Ptr++; + } + + // ------------------------------------------------------------------------ + // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the + // instruction ca be encoded as ModRm alone. + static __forceinline bool NeedsSibMagic( const ModSibBase& info ) + { + // If base register is ESP, then we need a SIB: + if( info.Base.IsStackPointer() ) return true; + + // no registers? no sibs! + // (ModSibBase::Reduce + if( info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() ) return true; + + return false; + } + + ////////////////////////////////////////////////////////////////////////////////////////// + // Conditionally generates Sib encoding information! + // + // regfield - register field to be written to the ModRm. This is either a register specifier + // or an opcode extension. In either case, the instruction determines the value for us. + // + __forceinline void EmitSibMagic( uint regfield, const ModSibBase& info ) + { + jASSUME( regfield < 8 ); + + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + if( info.Index.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseDisp32 ); + iWrite( info.Displacement ); + return; + } + else + { + if( info.Index == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, info.Index.Id ); + } + } + else + { + // In order to encode "just" index*scale (and no base), we have to encode + // it as a special [index*scale + displacement] form, which is done by + // specifying EBP as the base register and setting the displacement field + // to zero. (same as ModRm w/o SIB form above, basically, except the + // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). + + if( info.Base.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); + iWrite( info.Displacement ); + return; + } + else + { + if( info.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, info.Base.Id ); + } + } + + if( displacement_size != 0 ) + { + *(u32*)x86Ptr = info.Displacement; + x86Ptr += (displacement_size == 1) ? 1 : 4; + } + } +} + +using namespace Internal; + +/* +emitterT void x86SetPtr( u8* ptr ) +{ + x86Ptr = ptr; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// x86Ptr Label API +// + +class x86Label +{ +public: + class Entry + { + protected: + u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type) + u8* m_base; // base address of the instruction (passed to the instruction) + int m_cc; // comparison type of the instruction + + public: + explicit Entry( int cc ) : + m_base( x86Ptr ) + , m_writebackpos( writebackidx ) + { + } + + void Commit( const u8* target ) const + { + //uptr reltarget = (uptr)m_base - (uptr)target; + //*((u32*)&m_base[m_writebackpos]) = reltarget; + jASSUME( m_emit != NULL ); + jASSUME( m_base != NULL ); + return m_emit( m_base, target, m_cc ); + } + }; + +protected: + u8* m_target; // x86Ptr target address of this label + Entry m_writebacks[8]; + int m_writeback_curpos; + +public: + // creates a label list with no valid target. + // Use x86LabelList::Set() to set a target prior to class destruction. + x86Label() : m_target() + { + } + + x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() ) { - Index = Base; - Scale = 0; - Base = x86IndexReg::Empty; - return; } - // The Scale has a series of valid forms, all shown here: + // Performs all address writebacks on destruction. + virtual ~x86Label() + { + IssueWritebacks(); + } + + void SetTarget() { m_address = x86Ptr; } + void SetTarget( void* addr ) { m_address = (u8*)addr; } + + void Clear() + { + m_writeback_curpos = 0; + } - switch( Scale ) + // Adds a jump/call instruction to this label for writebacks. + void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc ) { - case 0: break; - case 1: Scale = 0; break; - case 2: Scale = 1; break; - - case 3: // becomes [reg*2+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 1; - break; - - case 4: Scale = 2; break; - - case 5: // becomes [reg*4+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 2; - break; - - case 6: // invalid! - assert( false ); - break; - - case 7: // so invalid! - assert( false ); - break; - - case 8: Scale = 3; break; - case 9: // becomes [reg*8+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 3; - break; + jASSUME( m_writeback_curpos < MaxWritebacks ); + m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) ); + m_writeback_curpos++; } -} - -ModSib::ModSib( const x86ModRm& src ) : - Base( src.Base ), - Index( src.Index ), - Scale( src.Factor ), - Displacement( src.Displacement ) -{ - Reduce(); -} - -ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) : - Base( base ), - Index( index ), - Scale( scale ), - Displacement( displacement ) -{ - Reduce(); -} - -ModSib::ModSib( s32 displacement ) : - Base(), - Index(), - Scale(0), - Displacement( displacement ) -{ -} - -// ------------------------------------------------------------------------ -// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the -// instruction ca be encoded as ModRm alone. -bool NeedsSibMagic( const ModSib& info ) -{ - // no registers? no sibs! - if( info.Index.IsEmpty() ) return false; - - // A scaled register needs a SIB - if( info.Scale != 0 ) return true; - - // two registers needs a SIB - if( !info.Base.IsEmpty() ) return true; - - // If index register is ESP, then we need a SIB: - // (the ModSib::Reduce() ensures that stand-alone ESP will be in the - // index position for us) - if( info.Index == esp ) return true; - - return false; -} - -// ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! -// -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -void EmitSibMagic( int regfield, const ModSib& info ) -{ - int displacement_size = (info.Displacement == 0) ? 0 : - ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); - - if( !NeedsSibMagic( info ) ) + + void IssueWritebacks() const { - // Use ModRm-only encoding, with the rm field holding an index/base register, if - // one has been specified. If neither register is specified then use Disp32 form, - // which is encoded as "EBP w/o displacement" (which is why EBP must always be - // encoded *with* a displacement of 0, if it would otherwise not have one). - - if( info.Index.IsEmpty() ) - ModRM( 0, regfield, ModRm_UseDisp32 ); - else + const std::list::const_iterator& start = m_list_writebacks. + for( ; start!=end; start++ ) { - if( info.Index == ebp && displacement_size == 0 ) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + Entry& current = *start; + u8* donespot = current.Commit(); + + // Copy the data from the m_nextinst to the current location, + // and update any additional writebacks (but what about multiple labels?!?) - ModRM( displacement_size, regfield, info.Index.Id ); } } - else - { - // In order to encode "just" index*scale (and no base), we have to encode - // it as a special [index*scale + displacement] form, which is done by - // specifying EBP as the base register and setting the displacement field - // to zero. (same as ModRm w/o SIB form above, basically, except the - // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). +}; +#endif - if( info.Base.IsEmpty() ) - { - ModRM( 0, regfield, ModRm_UseSib ); - SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); - displacement_size = 2; - } - else - { - if( info.Base == ebp && displacement_size == 0 ) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Scale, info.Index.Id, info.Base.Id ); - } - } - - switch( displacement_size ) - { - case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; - jNO_DEFAULT - } +void JMP( x86Label& dest ) +{ + dest.AddWriteback( x86Ptr, emitJMP, 0 ); } +void JLE( x86Label& dest ) +{ + dest.AddWriteback( x86Ptr, emitJCC, 0 ); +} + +void x86SetJ8( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + *j8 = (u8)jump; +} + +void x86SetJ8A( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + + if( ((uptr)x86Ptr&0xf) > 4 ) { + + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); + + if( newjump <= 0x7f ) { + jump = newjump; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + } + } + *j8 = (u8)jump; +} + +emitterT void x86SetJ32( u32* j32 ) +{ + *j32 = ( x86Ptr - (u8*)j32 ) - 4; +} + +emitterT void x86SetJ32A( u32* j32 ) +{ + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); +} + +emitterT void x86Align( int bytes ) +{ + // forward align + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); +} +*/ + // ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! +// Internal implementation of EmitSibMagic which has been custom tailored +// to optimize special forms of the Lea instructions accordingly, such +// as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg". // -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info ) -{ - EmitSibMagic( regfield.Id, info ); -} - template< typename ToReg > -static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) +static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) { int displacement_size = (src.Displacement == 0) ? 0 : ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); @@ -348,17 +408,17 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) if( src.Index.IsEmpty() ) { if( is16bit ) - MOV16ItoR( to.Id, src.Displacement ); + MOV( to, src.Displacement ); else - MOV32ItoR( to.Id, src.Displacement ); + MOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { if( is16bit ) - MOV16RtoR( to.Id, src.Index.Id ); + MOV( to, ToReg( src.Index.Id ) ); else - MOV32RtoR( to.Id, src.Index.Id ); + MOV( to, ToReg( src.Index.Id ) ); return; } else @@ -366,7 +426,7 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) // note: no need to do ebp+0 check since we encode all 0 displacements as // register assignments above (via MOV) - write8( 0x8d ); + iWrite( 0x8d ); ModRM( displacement_size, to.Id, src.Index.Id ); } } @@ -377,115 +437,236 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) if( displacement_size == 0 ) { // Encode [Index*Scale] as a combination of Mov and Shl. - // This is more efficient because of the bloated format which requires - // a 32 bit displacement. + // This is more efficient because of the bloated LEA format which requires + // a 32 bit displacement, and the compact nature of the alterntive. + // + // (this does not apply to older model P4s with the broken barrel shifter, + // but we currently aren't optimizing for that target anyway). - if( is16bit ) - { - MOV16RtoR( to.Id, src.Index.Id ); - SHL16ItoR( to.Id, src.Scale ); - } - else - { - MOV32RtoR( to.Id, src.Index.Id ); - SHL32ItoR( to.Id, src.Scale ); - } + MOV( to, ToReg( src.Index.Id ) ); + SHL( to, src.Scale ); return; } - - write8( 0x8d ); + iWrite( 0x8d ); ModRM( 0, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); - displacement_size = 2; // force 32bit displacement. + iWrite( src.Displacement ); + return; } else { if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - write8( 0x8d ); + iWrite( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); + + /*switch( displacement_size ) + { + case 0: break; + case 1: emit.write( src.Displacement ); break; + case 2: emit.write( src.Displacement ); break; + jNO_DEFAULT + }*/ } } - - switch( displacement_size ) - { - case 0: break; - case 1: write8( src.Displacement ); break; - case 2: write32( src.Displacement ); break; - jNO_DEFAULT - } + if( displacement_size != 0 ) + { + *(u32*)x86Ptr = src.Displacement; + x86Ptr += (displacement_size == 1) ? 1 : 4; + } } -emitterT void LEA32( x86Register32 to, const ModSib& src ) +__emitinline void LEA( x86Register32 to, const ModSibBase& src ) { EmitLeaMagic( to, src ); } -emitterT void LEA16( x86Register16 to, const ModSib& src ) +__emitinline void LEA( x86Register16 to, const ModSibBase& src ) { - // fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form? - write8( 0x66 ); EmitLeaMagic( to, src ); } +////////////////////////////////////////////////////////////////////////////////////////// +// MOV instruction Implementation + +template< typename ImmType, typename SibMagicType > +class MovImpl +{ +public: + static const uint OperandSize = sizeof(ImmType); + +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + static __forceinline void Emit( const x86Register& to, const x86Register& from ) + { + if( to == from ) return; // ignore redundant MOVs. + + prefix16(); + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + ModRM( 3, from.Id, to.Id ); + } + + static __forceinline void Emit( const ModSibBase& dest, const x86Register& from ) + { + prefix16(); + + // mov eax has a special from when writing directly to a DISP32 address + // (sans any register index/base registers). + + if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) + { + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( dest.Displacement ); + } + else + { + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + SibMagicType::Emit( from.Id, dest ); + } + } + + static __forceinline void Emit( const x86Register& to, const ModSibBase& src ) + { + prefix16(); + + // mov eax has a special from when reading directly from a DISP32 address + // (sans any register index/base registers). + + if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) + { + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( src.Displacement ); + } + else + { + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + SibMagicType::Emit( to.Id, src ); + } + } + + static __forceinline void Emit( const x86Register& to, ImmType imm ) + { + // Note: MOV does not have (reg16/32,imm8) forms. + + if( imm == 0 ) + XOR( to, to ); + else + { + prefix16(); + iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + iWrite( imm ); + } + } + + static __forceinline void Emit( ModSibStrict dest, ImmType imm ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); + SibMagicType::Emit( 0, dest ); + iWrite( imm ); + } +}; + +namespace Internal +{ + typedef MovImpl MOV32; + typedef MovImpl MOV16; + typedef MovImpl MOV8; + + typedef MovImpl MOV32i; + typedef MovImpl MOV16i; + typedef MovImpl MOV8i; +} + +// Inlining Notes: +// I've set up the inlining to be as practical and intelligent as possible, which means +// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to +// virtually no code. In the case of (Reg, Imm) forms, the inlinign is up to the dis- +// cretion of the compiler. +// + +// TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D + +// ---------- 32 Bit Interface ----------- +__forceinline void MOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } +__forceinline void MOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } +__forceinline void MOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } +__noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } +__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } +__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } + +void MOV( const x86Register32& to, u32 imm ) { MOV32i::Emit( to, imm ); } + + +// ---------- 16 Bit Interface ----------- +__forceinline void MOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } +__forceinline void MOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } +__forceinline void MOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } +__noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } +__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } +__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } + +void MOV( const x86Register16& to, u16 imm ) { MOV16i::Emit( to, imm ); } + + +// ---------- 8 Bit Interface ----------- +__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } +__forceinline void MOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } +__forceinline void MOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } +__noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } +__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } +__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } + +void MOV( const x86Register8& to, u8 imm ) { MOV8i::Emit( to, imm ); } + + ////////////////////////////////////////////////////////////////////////////////////////// // Miscellaneous Section! // Various Instructions with no parameter and no special encoding logic. // -emitterT void RET() { write8( 0xC3 ); } -emitterT void CBW() { write16( 0x9866 ); } -emitterT void CWD() { write8( 0x98 ); } -emitterT void CDQ() { write8( 0x99 ); } -emitterT void CWDE() { write8( 0x98 ); } +__forceinline void RET() { write8( 0xC3 ); } +__forceinline void CBW() { write16( 0x9866 ); } +__forceinline void CWD() { write8( 0x98 ); } +__forceinline void CDQ() { write8( 0x99 ); } +__forceinline void CWDE() { write8( 0x98 ); } -emitterT void LAHF() { write8( 0x9f ); } -emitterT void SAHF() { write8( 0x9e ); } +__forceinline void LAHF() { write8( 0x9f ); } +__forceinline void SAHF() { write8( 0x9e ); } ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters // -// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu -// is running in. So even thought these say push32, they would essentially be push64 on -// an x64 build. Should I rename them accordingly? --air -// // Note: pushad/popad implementations are intentionally left out. The instructions are // invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. -emitterT void POP( x86Register32 from ) +__forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); } + +__emitinline void POP( const ModSibBase& from ) { - write8( 0x58 | from.Id ); + iWrite( 0x8f ); Internal::EmitSibMagic( 0, from ); } -emitterT void POP( const ModSib& from ) -{ - write8( 0x8f ); EmitSibMagic( 0, from ); -} +__forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } +__forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); } -emitterT void PUSH( u32 imm ) +__emitinline void PUSH( const ModSibBase& from ) { - write8( 0x68 ); write32( imm ); -} - -emitterT void PUSH( x86Register32 from ) -{ - write8( 0x50 | from.Id ); -} - -emitterT void PUSH( const ModSib& from ) -{ - write8( 0xff ); EmitSibMagic( 6, from ); + iWrite( 0xff ); Internal::EmitSibMagic( 6, from ); } // pushes the EFLAGS register onto the stack -emitterT void PUSHFD() { write8( 0x9C ); } +__forceinline void PUSHFD() { write8( 0x9C ); } // pops the EFLAGS register from the stack -emitterT void POPFD() { write8( 0x9D ); } +__forceinline void POPFD() { write8( 0x9D ); } } diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index bf89a17853..6f2cb31575 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -15,1512 +15,49 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + /* - * ix86 definitions v0.6.2 - * Authors: linuzappz - * alexey silinov - * goldfinger - * shadow < shadow@pcsx2.net > - * cottonvibes(@gmail.com) + * ix86 public header v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) */ -#pragma once - -#include "ix86_types.h" - -#ifdef _MSC_VER -#define __threadlocal __declspec(thread) -#else -#define __threadlocal __thread -#endif - -#define MMXONLY(code) code - - -//------------------------------------------------------------------ -// write functions -//------------------------------------------------------------------ -#define emitterT __forceinline - -extern __threadlocal u8 *x86Ptr; -extern __threadlocal u8 *j8Ptr[32]; -extern __threadlocal u32 *j32Ptr[32]; - -template< typename T > -static emitterT void x86write( T val ) -{ - *(T*)x86Ptr = val; - x86Ptr += sizeof(T); -} - -static emitterT void write8( u8 val ) -{ - *x86Ptr = val; - x86Ptr++; -} - -static emitterT void write16( u16 val ) -{ - *(u16*)x86Ptr = val; - x86Ptr += 2; -} - -static emitterT void write24( u32 val ) -{ - *x86Ptr++ = (u8)(val & 0xff); - *x86Ptr++ = (u8)((val >> 8) & 0xff); - *x86Ptr++ = (u8)((val >> 16) & 0xff); -} - -static emitterT void write32( u32 val ) -{ - *(u32*)x86Ptr = val; - x86Ptr += 4; -} - -static emitterT void write64( u64 val ) -{ - *(u64*)x86Ptr = val; - x86Ptr += 8; -} - -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// jump/align functions -//------------------------------------------------------------------ -extern void x86SetPtr( u8 *ptr ); -extern void x86SetJ8( u8 *j8 ); -extern void x86SetJ8A( u8 *j8 ); -extern void x86SetJ16( u16 *j16 ); -extern void x86SetJ16A( u16 *j16 ); -extern void x86SetJ32( u32 *j32 ); -extern void x86SetJ32A( u32 *j32 ); -extern void x86Align( int bytes ); -extern void x86AlignExecutable( int align ); -//------------------------------------------------------------------ - ////////////////////////////////////////////////////////////////////////////////////////// // New C++ Emitter! // // To use it just include the x86Emitter namespace into your file/class/function off choice. - -namespace x86Emitter -{ - extern void POP( x86Register32 from ); - extern void POP( const ModSib& from ); - - extern void PUSH( u32 imm ); - extern void PUSH( x86Register32 from ); - extern void PUSH( const ModSib& from ); - - extern void LEA32( x86Register32 to, const ModSib& src ); - extern void LEA16( x86Register16 to, const ModSib& src ); - - - static __forceinline void POP( void* from ) { POP( ptr[from] ); } - static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } - - #define DECLARE_GROUP1_OPCODE_HELPER( lwr, bits ) \ - emitterT void lwr##bits( x86Register##bits to, x86Register##bits from ); \ - emitterT void lwr##bits( x86Register##bits to, void* from ); \ - emitterT void lwr##bits( x86Register##bits to, const ModSib& from ); \ - emitterT void lwr##bits( x86Register##bits to, u##bits imm ); \ - emitterT void lwr##bits( const ModSib& to, x86Register##bits from ); \ - emitterT void lwr##bits( void* to, x86Register##bits from ); \ - emitterT void lwr##bits( void* to, u##bits imm ); \ - emitterT void lwr##bits( const ModSib& to, u##bits imm ); - - #define DECLARE_GROUP1_OPCODE( lwr ) \ - DECLARE_GROUP1_OPCODE_HELPER( lwr, 32 ) - DECLARE_GROUP1_OPCODE_HELPER( lwr, 16 ) - DECLARE_GROUP1_OPCODE_HELPER( lwr, 8 ) - - DECLARE_GROUP1_OPCODE( ADD ) - DECLARE_GROUP1_OPCODE( CMP ) - DECLARE_GROUP1_OPCODE( OR ) - DECLARE_GROUP1_OPCODE( ADC ) - DECLARE_GROUP1_OPCODE( SBB ) - DECLARE_GROUP1_OPCODE( AND ) - DECLARE_GROUP1_OPCODE( SUB ) - DECLARE_GROUP1_OPCODE( XOR ) - -} - - -extern void CLC( void ); -extern void NOP( void ); - -//////////////////////////////////// -// mov instructions // -//////////////////////////////////// - -// mov r32 to r32 -extern void MOV32RtoR( x86IntRegType to, x86IntRegType from ); -// mov r32 to m32 -extern void MOV32RtoM( uptr to, x86IntRegType from ); -// mov m32 to r32 -extern void MOV32MtoR( x86IntRegType to, uptr from ); -// mov [r32] to r32 -extern void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ); -// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack -extern void FSUBP( void ); -// fmul ST(src) to fpu reg stack ST(0) -extern void FMUL32Rto0( x86IntRegType src ); -// fmul ST(0) to fpu reg stack ST(src) -extern void FMUL320toR( x86IntRegType src ); -// fdiv ST(src) to fpu reg stack ST(0) -extern void FDIV32Rto0( x86IntRegType src ); -// fdiv ST(0) to fpu reg stack ST(src) -extern void FDIV320toR( x86IntRegType src ); -// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) -extern void FDIV320toRP( x86IntRegType src ); - -// fadd m32 to fpu reg stack -extern void FADD32( u32 from ); -// fsub m32 to fpu reg stack -extern void FSUB32( u32 from ); -// fmul m32 to fpu reg stack -extern void FMUL32( u32 from ); -// fdiv m32 to fpu reg stack -extern void FDIV32( u32 from ); -// fcomi st, st( i) -extern void FCOMI( x86IntRegType src ); -// fcomip st, st( i) -extern void FCOMIP( x86IntRegType src ); -// fucomi st, st( i) -extern void FUCOMI( x86IntRegType src ); -// fucomip st, st( i) -extern void FUCOMIP( x86IntRegType src ); -// fcom m32 to fpu reg stack -extern void FCOM32( u32 from ); -// fabs fpu reg stack -extern void FABS( void ); -// fsqrt fpu reg stack -extern void FSQRT( void ); -// ftan fpu reg stack -extern void FPATAN( void ); -// fsin fpu reg stack -extern void FSIN( void ); -// fchs fpu reg stack -extern void FCHS( void ); - -// fcmovb fpu reg to fpu reg stack -extern void FCMOVB32( x86IntRegType from ); -// fcmove fpu reg to fpu reg stack -extern void FCMOVE32( x86IntRegType from ); -// fcmovbe fpu reg to fpu reg stack -extern void FCMOVBE32( x86IntRegType from ); -// fcmovu fpu reg to fpu reg stack -extern void FCMOVU32( x86IntRegType from ); -// fcmovnb fpu reg to fpu reg stack -extern void FCMOVNB32( x86IntRegType from ); -// fcmovne fpu reg to fpu reg stack -extern void FCMOVNE32( x86IntRegType from ); -// fcmovnbe fpu reg to fpu reg stack -extern void FCMOVNBE32( x86IntRegType from ); -// fcmovnu fpu reg to fpu reg stack -extern void FCMOVNU32( x86IntRegType from ); -extern void FCOMP32( u32 from ); -extern void FNSTSWtoAX( void ); - -#define MMXONLY(code) code - -//****************** -// MMX instructions -//****************** - -// r64 = mm - -// movq m64 to r64 -extern void MOVQMtoR( x86MMXRegType to, uptr from ); -// movq r64 to m64 -extern void MOVQRtoM( uptr to, x86MMXRegType from ); - -// pand r64 to r64 -extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); -// pand m64 to r64 ; -extern void PANDMtoR( x86MMXRegType to, uptr from ); -// pandn r64 to r64 -extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); -// pandn r64 to r64 -extern void PANDNMtoR( x86MMXRegType to, uptr from ); -// por r64 to r64 -extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); -// por m64 to r64 -extern void PORMtoR( x86MMXRegType to, uptr from ); -// pxor r64 to r64 -extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); -// pxor m64 to r64 -extern void PXORMtoR( x86MMXRegType to, uptr from ); - -// psllq r64 to r64 -extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); -// psllq m64 to r64 -extern void PSLLQMtoR( x86MMXRegType to, uptr from ); -// psllq imm8 to r64 -extern void PSLLQItoR( x86MMXRegType to, u8 from ); -// psrlq r64 to r64 -extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); -// psrlq m64 to r64 -extern void PSRLQMtoR( x86MMXRegType to, uptr from ); -// psrlq imm8 to r64 -extern void PSRLQItoR( x86MMXRegType to, u8 from ); - -// paddusb r64 to r64 -extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddusb m64 to r64 -extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); -// paddusw r64 to r64 -extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddusw m64 to r64 -extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); - -// paddb r64 to r64 -extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddb m64 to r64 -extern void PADDBMtoR( x86MMXRegType to, uptr from ); -// paddw r64 to r64 -extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddw m64 to r64 -extern void PADDWMtoR( x86MMXRegType to, uptr from ); -// paddd r64 to r64 -extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddd m64 to r64 -extern void PADDDMtoR( x86MMXRegType to, uptr from ); -extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); - -// paddq m64 to r64 (sse2 only?) -extern void PADDQMtoR( x86MMXRegType to, uptr from ); -// paddq r64 to r64 (sse2 only?) -extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); - -extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); - -extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBDMtoR( x86MMXRegType to, uptr from ); - -// psubq m64 to r64 (sse2 only?) -extern void PSUBQMtoR( x86MMXRegType to, uptr from ); -// psubq r64 to r64 (sse2 only?) -extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); - -// pmuludq m64 to r64 (sse2 only?) -extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); -// pmuludq r64 to r64 (sse2 only?) -extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); - -extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); -extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); -extern void PSRLWItoR( x86MMXRegType to, u8 from ); -extern void PSRLDItoR( x86MMXRegType to, u8 from ); -extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSLLWItoR( x86MMXRegType to, u8 from ); -extern void PSLLDItoR( x86MMXRegType to, u8 from ); -extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSRAWItoR( x86MMXRegType to, u8 from ); -extern void PSRADItoR( x86MMXRegType to, u8 from ); -extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); -extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); -extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 -extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); -extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); -extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); -extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); -extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); -extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); -extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); -extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); -extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); -extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); -extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); -extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); - -// emms -extern void EMMS( void ); - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits -//********************************************************************************** -extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); -extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); - -extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); - -//********************* -// SSE instructions * -//********************* -extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); -extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); - -extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); -extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); -extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); -extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); -extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); -extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); -extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); - -extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); -extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); -extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); - -extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); - -extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); -extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); - -extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); - -extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); -extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); -extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); -extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); -extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); - -extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); -extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); -extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); -extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); - -extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); -extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); -extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -// VectorPath -extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); -extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -extern void SSE_STMXCSR( uptr from ); -extern void SSE_LDMXCSR( uptr from ); - - -//********************* -// SSE 2 Instructions* -//********************* - -extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); -extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); - -extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); -extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); - -extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); - -extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word -//********************************************************************************** -extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); - -//**********************************************************************************/ -//PUNPCKHWD: Unpack 16bit high -//********************************************************************************** -extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); - -// mult by half words -extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); - - -//**********************************************************************************/ -//PMOVMSKB: Create 16bit mask from signs of 8bit integers -//********************************************************************************** -extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); - -extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); -extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); - -//**********************************************************************************/ -//PEXTRW,PINSRW: Packed Extract/Insert Word * -//********************************************************************************** -extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); -extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); - - -//**********************************************************************************/ -//PSUBx: Subtract Packed Integers * -//********************************************************************************** -extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PCMPxx: Compare Packed Integers * -//********************************************************************************** -extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); - -//**********************************************************************************/ -//MOVD: Move Dword(32bit) to /from XMM reg * -//********************************************************************************** -extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); -extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); -extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); -extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); -extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); -extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); - -//**********************************************************************************/ -//MOVD: Move Qword(64bit) to/from MMX/XMM reg * -//********************************************************************************** -extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); -extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); - - -//**********************************************************************************/ -//POR : SSE Bitwise OR * -//********************************************************************************** -extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); - -extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); - -// SSSE3 - -extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); - -// SSE4.1 - -#ifndef _MM_MK_INSERTPS_NDX -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#endif - -extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); -extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); -extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); -extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); -extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); -extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); -extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); - -//********************* -// 3DNOW instructions * -//********************* -extern void FEMMS( void ); -extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); -extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); -extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); -extern void PFADDMtoR( x86IntRegType to, uptr from ); -extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFSUBMtoR( x86IntRegType to, uptr from ); -extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFMULMtoR( x86IntRegType to, uptr from ); -extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRCPMtoR( x86IntRegType to, uptr from ); -extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); -extern void PF2IDMtoR( x86IntRegType to, uptr from ); -extern void PI2FDMtoR( x86IntRegType to, uptr from ); -extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFMAXMtoR( x86IntRegType to, uptr from ); -extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFMINMtoR( x86IntRegType to, uptr from ); -extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); +// +// This header file is intended for use by public code. It includes the appropriate +// inlines and class definitions for efficient codegen. (code internal to the emitter +// should usually use ix86_internal.h instead, and manually include the +// ix86_inlines.inl file when it is known that inlining of ModSib functions are +// wanted). +// +// +// Important when Using the New Emitter: +// Make sure there is *no* data in use or of importance past the end of the +// current x86Ptr. Ie, don't do fancy x86Ptr rewind tricks of your own. The +// emitter uses optimized writes which will clobber data past the end of the +// instruction it's emitting, so even if you know for sure the instruction you +// are writing is 5 bytes, the emitter will likely emit 9 bytes and the re- +// wind the x86Ptr to the end of the instruction. +// + +#pragma once + +#include "ix86_types.h" +#include "ix86_instructions.h" + +// Including legacy items for now, but these should be removed eventually, +// once most code is no longer dependent on them. +#include "ix86_legacy_types.h" +#include "ix86_legacy_instructions.h" diff --git a/pcsx2/x86/ix86/ix86_3dnow.cpp b/pcsx2/x86/ix86/ix86_3dnow.cpp index ae6743cc3d..4f053ff37c 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.cpp +++ b/pcsx2/x86/ix86/ix86_3dnow.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" //------------------------------------------------------------------ // 3DNOW instructions diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index b1fc04a96e..083d72e1b2 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -18,9 +18,8 @@ #include "PrecompiledHeader.h" -#include "ix86_internal.h" #include "System.h" -#include "Threading.h" +#include "ix86_legacy_internal.h" #include "RedtapeWindows.h" diff --git a/pcsx2/x86/ix86/ix86_fpu.cpp b/pcsx2/x86/ix86/ix86_fpu.cpp index d7e3a65963..f3f9631e01 100644 --- a/pcsx2/x86/ix86/ix86_fpu.cpp +++ b/pcsx2/x86/ix86/ix86_fpu.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" //------------------------------------------------------------------ // FPU instructions diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl new file mode 100644 index 0000000000..69cc3b03f5 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -0,0 +1,237 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* + * ix86 core v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) + */ + +#pragma once + +// This header module contains functions which, under most circumstances, inline +// nicely with constant propagation from the compiler, resulting in little or +// no actual codegen in the majority of emitter statements. (common forms include: +// RegToReg, PointerToReg, RegToPointer). These cannot be included in the class +// definitions in the .h file because of inter-dependencies with other classes. +// (score one for C++!!) +// +// In order for MSVC to work correctly with __forceinline on class members, +// however, we need to include these methods into all source files which might +// reference them. Without this MSVC generates linker errors. Or, in other words, +// global optimization fails to resolve the externals and junk. +// (score one for MSVC!) + +namespace x86Emitter +{ + ////////////////////////////////////////////////////////////////////////////////////////// + // x86Register Method Implementations + // + __forceinline x86AddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const + { + return x86AddressInfo( *this, right ); + } + + __forceinline x86AddressInfo x86IndexReg::operator+( const x86AddressInfo& right ) const + { + return right + *this; + } + + __forceinline x86AddressInfo x86IndexReg::operator+( s32 right ) const + { + return x86AddressInfo( *this, right ); + } + + __forceinline x86AddressInfo x86IndexReg::operator*( u32 right ) const + { + return x86AddressInfo( Empty, *this, right ); + } + + __forceinline x86AddressInfo x86IndexReg::operator<<( u32 shift ) const + { + return x86AddressInfo( Empty, *this, 1< + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) + */ + +#pragma once + +namespace x86Emitter +{ + // ----- Lea Instructions (Load Effective Address) ----- + // Note: alternate (void*) forms of these instructions are not provided since those + // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs + // instead. + + extern void LEA( x86Register32 to, const ModSibBase& src ); + extern void LEA( x86Register16 to, const ModSibBase& src ); + + // ----- Push / Pop Instructions ----- + + extern void POP( x86Register32 from ); + extern void POP( const ModSibBase& from ); + + extern void PUSH( u32 imm ); + extern void PUSH( x86Register32 from ); + extern void PUSH( const ModSibBase& from ); + + static __forceinline void POP( void* from ) { POP( ptr[from] ); } + static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } + + // ------------------------------------------------------------------------ + using Internal::ADD; + using Internal::OR; + using Internal::ADC; + using Internal::SBB; + using Internal::AND; + using Internal::SUB; + using Internal::XOR; + using Internal::CMP; + + using Internal::ROL; + using Internal::ROR; + using Internal::RCL; + using Internal::RCR; + using Internal::SHL; + using Internal::SHR; + using Internal::SAR; + + // ---------- 32 Bit Interface ----------- + extern void MOV( const x86Register32& to, const x86Register32& from ); + extern void MOV( const ModSibBase& sibdest, const x86Register32& from ); + extern void MOV( const x86Register32& to, const ModSibBase& sibsrc ); + extern void MOV( const x86Register32& to, const void* src ); + extern void MOV( const void* dest, const x86Register32& from ); + + extern void MOV( const x86Register32& to, u32 imm ); + extern void MOV( const ModSibStrict<4>& sibdest, u32 imm ); + + // ---------- 16 Bit Interface ----------- + extern void MOV( const x86Register16& to, const x86Register16& from ); + extern void MOV( const ModSibBase& sibdest, const x86Register16& from ); + extern void MOV( const x86Register16& to, const ModSibBase& sibsrc ); + extern void MOV( const x86Register16& to, const void* src ); + extern void MOV( const void* dest, const x86Register16& from ); + + extern void MOV( const x86Register16& to, u16 imm ); + extern void MOV( const ModSibStrict<2>& sibdest, u16 imm ); + + // ---------- 8 Bit Interface ----------- + extern void MOV( const x86Register8& to, const x86Register8& from ); + extern void MOV( const ModSibBase& sibdest, const x86Register8& from ); + extern void MOV( const x86Register8& to, const ModSibBase& sibsrc ); + extern void MOV( const x86Register8& to, const void* src ); + extern void MOV( const void* dest, const x86Register8& from ); + + extern void MOV( const x86Register8& to, u8 imm ); + extern void MOV( const ModSibStrict<1>& sibdest, u8 imm ); + +} + diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h index 4f3f72f2e4..b8ed9269ca 100644 --- a/pcsx2/x86/ix86/ix86_internal.h +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -1,43 +1,22 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ #pragma once -#include "ix86.h" -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ - -#define MEMADDR(addr, oplen) (addr) - -#define Rex(w,r,x,b) assert(0) -#define RexR(w, reg) assert( !(w || (reg)>=8) ) -#define RexB(w, base) assert( !(w || (base)>=8) ) -#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) -#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) - -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) - -static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) -static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) - - -//------------------------------------------------------------------ -// General Emitter Helper functions -//------------------------------------------------------------------ - -namespace x86Emitter -{ - extern void EmitSibMagic( int regfield, const ModSib& info ); - extern void EmitSibMagic( x86Register32 regfield, const ModSib& info ); - extern bool NeedsSibMagic( const ModSib& info ); -} - -// From here out are the legacy (old) emitter functions... - -extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); -extern void ModRM( int mod, int reg, int rm ); -extern void SibSB( int ss, int index, int base ); -extern void SET8R( int cc, int to ); -extern u8* J8Rel( int cc, int to ); -extern u32* J32Rel( int cc, u32 to ); -extern u64 GetCPUTick( void ); -//------------------------------------------------------------------ +#include "ix86_types.h" +#include "ix86_instructions.h" diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index eec040ac33..722aba5089 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -30,10 +30,119 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" using namespace x86Emitter; +template< int OperandSize > +static __forceinline x86Register _reghlp( x86IntRegType src ) +{ + return x86Register( src ); +} + +static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) +{ + return ptr[_reghlp<4>(src)]; +} + +template< int OperandSize > +static __forceinline ModSibStrict _mhlp( x86IntRegType src ) +{ + return ModSibStrict( x86IndexReg::Empty, x86IndexReg(src) ); +} + +template< int OperandSize > +static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86IntRegType src2 ) +{ + return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); +} + +#define DEFINE_LEGACY_HELPER( cod, bits ) \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { cod( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { cod( _reghlp(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { cod( (void*)to, _reghlp(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp(to), _mhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp(to) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ + { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \ + { cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } + +#define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \ + emitterT void cod##bits##CLtoR( x86IntRegType to ) { cod( _reghlp(to), cl ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##CLtoM( uptr to ) { cod( ptr##bits[to], cl ); } \ + emitterT void cod##bits##ItoM( uptr to, u8 imm ) { cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp(to) + offset, cl ); } + +//emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ +// { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + +#define DEFINE_OPCODE_LEGACY( cod ) \ + DEFINE_LEGACY_HELPER( cod, 32 ) \ + DEFINE_LEGACY_HELPER( cod, 16 ) \ + DEFINE_LEGACY_HELPER( cod, 8 ) + +#define DEFINE_OPCODE_SHIFT_LEGACY( cod ) \ + DEFINE_LEGACY_SHIFT_HELPER( cod, 32 ) \ + DEFINE_LEGACY_SHIFT_HELPER( cod, 16 ) \ + DEFINE_LEGACY_SHIFT_HELPER( cod, 8 ) + +////////////////////////////////////////////////////////////////////////////////////////// +// +DEFINE_OPCODE_LEGACY( ADD ) +DEFINE_OPCODE_LEGACY( CMP ) +DEFINE_OPCODE_LEGACY( OR ) +DEFINE_OPCODE_LEGACY( ADC ) +DEFINE_OPCODE_LEGACY( SBB ) +DEFINE_OPCODE_LEGACY( AND ) +DEFINE_OPCODE_LEGACY( SUB ) +DEFINE_OPCODE_LEGACY( XOR ) + +DEFINE_OPCODE_SHIFT_LEGACY( ROL ) +DEFINE_OPCODE_SHIFT_LEGACY( ROR ) +DEFINE_OPCODE_SHIFT_LEGACY( RCL ) +DEFINE_OPCODE_SHIFT_LEGACY( RCR ) +DEFINE_OPCODE_SHIFT_LEGACY( SHL ) +DEFINE_OPCODE_SHIFT_LEGACY( SHR ) +DEFINE_OPCODE_SHIFT_LEGACY( SAR ) + +DEFINE_OPCODE_LEGACY( MOV ) + +// mov r32 to [r32<(to), from ); +} + +emitterT void AND32I8toM( uptr to, s8 from ) +{ + AND( ptr8[to], from ); +} + + + // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. @@ -70,16 +179,6 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset } } -emitterT void ModRM( s32 mod, s32 reg, s32 rm ) -{ - write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); -} - -emitterT void SibSB( s32 ss, s32 index, s32 base ) -{ - write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); -} - emitterT void SET8R( int cc, int to ) { RexB(0, to); @@ -191,43 +290,6 @@ emitterT void x86Align( int bytes ) x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); } -//////////////////////////////////////////////////// -// Generates executable code to align to the given alignment (could be useful for the second leg -// of if/else conditionals, which usually fall through a jump target label). -// -// Note: Left in for now just in case, but usefulness is moot. Only K8's and older (non-Prescott) -// P4s benefit from this, and we don't optimize for those platforms anyway. -// -void x86AlignExecutable( int align ) -{ - uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); - uptr bytes = ( newx86 - (uptr)x86Ptr ); - - switch( bytes ) - { - case 0: break; - - case 1: NOP(); break; - case 2: MOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: NOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: NOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; - - default: - { - // for larger alignments, just use a JMP... - u8* aligned_target = JMP8(0); - x86Ptr = (u8*)newx86; - x86SetJ8( aligned_target ); - } - } - - jASSUME( x86Ptr == (u8*)newx86 ); -} - /********************/ /* IX86 instructions */ /********************/ @@ -249,281 +311,6 @@ emitterT void NOP( void ) } -//////////////////////////////////// -// mov instructions / -//////////////////////////////////// - -/* mov r32 to r32 */ -emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) -{ - if( to == from ) return; - - RexRB(0, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); -} - -/* mov r32 to m32 */ -emitterT void MOV32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0, from); - if (from == EAX) { - write8(0xA3); - } else { - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - } - write32( MEMADDR(to, 4) ); -} - -/* mov m32 to r32 */ -emitterT void MOV32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0, to); - if (to == EAX) { - write8(0xA1); - } else { - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - } - write32( MEMADDR(from, 4) ); -} - -emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -/* mov [r32+r32*scale] to r32 */ -emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - RexRXB(0,to,from2,from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -// mov r32 to [r32< subtract ST(0) from ST(1), store in ST(1) and POP stack +extern void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +extern void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +extern void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +extern void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +extern void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +extern void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +extern void FADD32( u32 from ); +// fsub m32 to fpu reg stack +extern void FSUB32( u32 from ); +// fmul m32 to fpu reg stack +extern void FMUL32( u32 from ); +// fdiv m32 to fpu reg stack +extern void FDIV32( u32 from ); +// fcomi st, st( i) +extern void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +extern void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +extern void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +extern void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +extern void FCOM32( u32 from ); +// fabs fpu reg stack +extern void FABS( void ); +// fsqrt fpu reg stack +extern void FSQRT( void ); +// ftan fpu reg stack +extern void FPATAN( void ); +// fsin fpu reg stack +extern void FSIN( void ); +// fchs fpu reg stack +extern void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +extern void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +extern void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +extern void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +extern void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +extern void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +extern void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +extern void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +extern void FCMOVNU32( x86IntRegType from ); +extern void FCOMP32( u32 from ); +extern void FNSTSWtoAX( void ); + +#define MMXONLY(code) code + +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +extern void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +extern void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +extern void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +extern void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +extern void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +extern void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +extern void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +extern void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +extern void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +extern void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +extern void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +extern void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +extern void PADDDMtoR( x86MMXRegType to, uptr from ); +extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +extern void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +extern void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +extern void PSRLWItoR( x86MMXRegType to, u8 from ); +extern void PSRLDItoR( x86MMXRegType to, u8 from ); +extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSLLWItoR( x86MMXRegType to, u8 from ); +extern void PSLLDItoR( x86MMXRegType to, u8 from ); +extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSRAWItoR( x86MMXRegType to, u8 from ); +extern void PSRADItoR( x86MMXRegType to, u8 from ); +extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); +extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); +extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +extern void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +//********************* +// SSE instructions * +//********************* +extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); + +extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE_STMXCSR( uptr from ); +extern void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* + +extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +//**********************************************************************************/ +//MOVD: Move Qword(64bit) to/from MMX/XMM reg * +//********************************************************************************** +extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); + +// SSSE3 + +extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); + +// SSE4.1 + +#ifndef _MM_MK_INSERTPS_NDX +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) +#endif + +extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); +extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); +extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +//********************* +// 3DNOW instructions * +//********************* +extern void FEMMS( void ); +extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); +extern void PFADDMtoR( x86IntRegType to, uptr from ); +extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFSUBMtoR( x86IntRegType to, uptr from ); +extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMULMtoR( x86IntRegType to, uptr from ); +extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPMtoR( x86IntRegType to, uptr from ); +extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PF2IDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMAXMtoR( x86IntRegType to, uptr from ); +extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMINMtoR( x86IntRegType to, uptr from ); +extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); + diff --git a/pcsx2/x86/ix86/ix86_legacy_internal.h b/pcsx2/x86/ix86/ix86_legacy_internal.h new file mode 100644 index 0000000000..92aab168f0 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_legacy_internal.h @@ -0,0 +1,78 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#include "ix86_internal.h" + +//------------------------------------------------------------------ +// Legacy Helper Macros and Functions (depreciated) +//------------------------------------------------------------------ + +#include "ix86_legacy_types.h" +#include "ix86_legacy_instructions.h" + +#define MEMADDR(addr, oplen) (addr) + +#define Rex(w,r,x,b) assert(0) +#define RexR(w, reg) assert( !(w || (reg)>=8) ) +#define RexB(w, base) assert( !(w || (base)>=8) ) +#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) +#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) + +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) + +extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); +extern void ModRM( uint mod, uint reg, uint rm ); +extern void SibSB( uint ss, uint index, uint base ); +extern void SET8R( int cc, int to ); +extern u8* J8Rel( int cc, int to ); +extern u32* J32Rel( int cc, u32 to ); +extern u64 GetCPUTick( void ); + + +////////////////////////////////////////////////////////////////////////////////////////// +// +emitterT void ModRM( uint mod, uint reg, uint rm ) +{ + // Note: Following ASSUMEs are for legacy support only. + // The new emitter performs these sanity checks during operand construction, so these + // assertions can probably be removed once all legacy emitter code has been removed. + jASSUME( mod < 4 ); + jASSUME( reg < 8 ); + jASSUME( rm < 8 ); + //write8( (mod << 6) | (reg << 3) | rm ); + + *(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; + x86Ptr++; + +} + +emitterT void SibSB( uint ss, uint index, uint base ) +{ + // Note: Following ASSUMEs are for legacy support only. + // The new emitter performs these sanity checks during operand construction, so these + // assertions can probably be removed once all legacy emitter code has been removed. + jASSUME( ss < 4 ); + jASSUME( index < 8 ); + jASSUME( base < 8 ); + //write8( (ss << 6) | (index << 3) | base ); + + *(u32*)x86Ptr = (ss << 6) | (index << 3) | base; + x86Ptr++; +} diff --git a/pcsx2/x86/ix86/ix86_legacy_types.h b/pcsx2/x86/ix86/ix86_legacy_types.h new file mode 100644 index 0000000000..eb936c3366 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_legacy_types.h @@ -0,0 +1,140 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#define SIB 4 // maps to ESP +#define SIBDISP 5 // maps to EBP +#define DISP32 5 // maps to EBP + +// general types +typedef int x86IntRegType; + +#define EAX 0 +#define EBX 3 +#define ECX 1 +#define EDX 2 +#define ESI 6 +#define EDI 7 +#define EBP 5 +#define ESP 4 + +#define X86ARG1 EAX +#define X86ARG2 ECX +#define X86ARG3 EDX +#define X86ARG4 EBX + +#define MM0 0 +#define MM1 1 +#define MM2 2 +#define MM3 3 +#define MM4 4 +#define MM5 5 +#define MM6 6 +#define MM7 7 + +typedef int x86MMXRegType; + +#define XMM0 0 +#define XMM1 1 +#define XMM2 2 +#define XMM3 3 +#define XMM4 4 +#define XMM5 5 +#define XMM6 6 +#define XMM7 7 +#define XMM8 8 +#define XMM9 9 +#define XMM10 10 +#define XMM11 11 +#define XMM12 12 +#define XMM13 13 +#define XMM14 14 +#define XMM15 15 + +typedef int x86SSERegType; +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#define SIB 4 // maps to ESP +#define SIBDISP 5 // maps to EBP +#define DISP32 5 // maps to EBP + +// general types +typedef int x86IntRegType; + +#define EAX 0 +#define EBX 3 +#define ECX 1 +#define EDX 2 +#define ESI 6 +#define EDI 7 +#define EBP 5 +#define ESP 4 + +#define X86ARG1 EAX +#define X86ARG2 ECX +#define X86ARG3 EDX +#define X86ARG4 EBX + +#define MM0 0 +#define MM1 1 +#define MM2 2 +#define MM3 3 +#define MM4 4 +#define MM5 5 +#define MM6 6 +#define MM7 7 + +typedef int x86MMXRegType; + +#define XMM0 0 +#define XMM1 1 +#define XMM2 2 +#define XMM3 3 +#define XMM4 4 +#define XMM5 5 +#define XMM6 6 +#define XMM7 7 +#define XMM8 8 +#define XMM9 9 +#define XMM10 10 +#define XMM11 11 +#define XMM12 12 +#define XMM13 13 +#define XMM14 14 +#define XMM15 15 + +typedef int x86SSERegType; diff --git a/pcsx2/x86/ix86/ix86_mmx.cpp b/pcsx2/x86/ix86/ix86_mmx.cpp index 77f8f33c97..74abe3e5df 100644 --- a/pcsx2/x86/ix86/ix86_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_mmx.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" //------------------------------------------------------------------ // MMX instructions diff --git a/pcsx2/x86/ix86/ix86_sse.cpp b/pcsx2/x86/ix86/ix86_sse.cpp index ffeb51365b..6eb149dca3 100644 --- a/pcsx2/x86/ix86/ix86_sse.cpp +++ b/pcsx2/x86/ix86/ix86_sse.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" #include "ix86_sse_helpers.h" ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index 13028b7432..9218c76260 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -19,7 +19,7 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86/ix86.h" +#include "ix86.h" // used to make sure regs don't get changed while in recompiler // use FreezeMMXRegs, FreezeXMMRegs diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index ede42f92af..6c91b2a9d7 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -23,57 +23,6 @@ #define X86REGS 8 #define MMXREGS 8 -#define SIB 4 -#define SIBDISP 5 -#define DISP32 5 - -// general types -typedef int x86IntRegType; - -#define EAX 0 -#define EBX 3 -#define ECX 1 -#define EDX 2 -#define ESI 6 -#define EDI 7 -#define EBP 5 -#define ESP 4 - -#define X86ARG1 EAX -#define X86ARG2 ECX -#define X86ARG3 EDX -#define X86ARG4 EBX - -#define MM0 0 -#define MM1 1 -#define MM2 2 -#define MM3 3 -#define MM4 4 -#define MM5 5 -#define MM6 6 -#define MM7 7 - -typedef int x86MMXRegType; - -#define XMM0 0 -#define XMM1 1 -#define XMM2 2 -#define XMM3 3 -#define XMM4 4 -#define XMM5 5 -#define XMM6 6 -#define XMM7 7 -#define XMM8 8 -#define XMM9 9 -#define XMM10 10 -#define XMM11 11 -#define XMM12 12 -#define XMM13 13 -#define XMM14 14 -#define XMM15 15 - -typedef int x86SSERegType; - enum XMMSSEType { XMMT_INT = 0, // integer (sse2 only) @@ -149,104 +98,164 @@ struct CPUINFO{ }; extern CPUINFO cpuinfo; -//------------------------------------------------------------------ +//------------------------------------------------------------------ +#ifdef _MSC_VER +#define __threadlocal __declspec(thread) +#else +#define __threadlocal __thread +#endif + +extern __threadlocal u8 *x86Ptr; +extern __threadlocal u8 *j8Ptr[32]; +extern __threadlocal u32 *j32Ptr[32]; + + +//------------------------------------------------------------------ // templated version of is_s8 is required, so that u16's get correct sign extension treatment. template< typename T > static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } +template< typename T > +static __forceinline void iWrite( T val ) +{ + *(T*)x86Ptr = val; + x86Ptr += sizeof(T); +} + namespace x86Emitter { - class x86ModRm; - ////////////////////////////////////////////////////////////////////////////////////////// - // - struct x86Register32 +///////////////////////////////////////////////////////////////////////////////////////////// +// __emitline - preprocessors definition +// +// This is configured to inline emitter functions appropriately for release builds, and +// disables some of the more aggressive inlines for dev builds (which can be helpful when +// debugging). +// +// Note: I use __forceinline directly for most single-line class members, when needed. +// There's no point in using __emitline in these cases since the debugger can't trace into +// single-line functions anyway. +// +#ifdef PCSX2_DEVBUILD +#define __emitinline +#else +#define __emitinline __forceinline +#endif + +#ifdef _MSC_VER +# define __noinline __declspec(noinline) +#else +# define __noinline +#endif + + + static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) + static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + + class x86AddressInfo; + class ModSibBase; + + static __forceinline void write8( u8 val ) { - static const x86Register32 Empty; // defined as an empty/unused value (-1) - - int Id; + iWrite( val ); + } - x86Register32( const x86Register32& src ) : Id( src.Id ) {} - x86Register32() : Id( -1 ) {} - explicit x86Register32( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + static __forceinline void write16( u16 val ) + { + iWrite( val ); + } - bool IsEmpty() const { return Id == -1; } + static __forceinline void write24( u32 val ) + { + *(u32*)x86Ptr = val; + x86Ptr += 3; + } - bool operator==( const x86Register32& src ) const { return Id == src.Id; } - bool operator!=( const x86Register32& src ) const { return Id != src.Id; } - - x86ModRm operator+( const x86Register32& right ) const; - x86ModRm operator+( const x86ModRm& right ) const; - x86ModRm operator+( s32 right ) const; + static __forceinline void write32( u32 val ) + { + iWrite( val ); + } + + static __forceinline void write64( u64 val ) + { + iWrite( val ); + } - x86ModRm operator*( u32 factor ) const; - - x86Register32& operator=( const x86Register32& src ) - { - Id = src.Id; - return *this; - } - }; - ////////////////////////////////////////////////////////////////////////////////////////// - // Similar to x86Register, but without the ability to add/combine them with ModSib. // - class x86Register16 + template< int OperandSize > + class x86Register { public: - static const x86Register16 Empty; + static const x86Register Empty; // defined as an empty/unused value (-1) int Id; - x86Register16( const x86Register16& src ) : Id( src.Id ) {} - x86Register16() : Id( -1 ) {} - explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + x86Register( const x86Register& src ) : Id( src.Id ) {} + x86Register(): Id( -1 ) {} + explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } - bool operator==( const x86Register16& src ) const { return Id == src.Id; } - bool operator!=( const x86Register16& src ) const { return Id != src.Id; } + // Returns true if the register is a valid accumulator: Eax, Ax, Al. + bool IsAccumulator() const { return Id == 0; } - x86Register16& operator=( const x86Register16& src ) + bool operator==( const x86Register& src ) const + { + return (Id == src.Id); + } + + bool operator!=( const x86Register& src ) const + { + return (Id != src.Id); + } + + x86Register& operator=( const x86Register& src ) { Id = src.Id; return *this; } }; + typedef x86Register<4> x86Register32; + typedef x86Register<2> x86Register16; + typedef x86Register<1> x86Register8; + ////////////////////////////////////////////////////////////////////////////////////////// - // Similar to x86Register, but without the ability to add/combine them with ModSib. - // - class x86Register8 + // Use 32 bit registers as out index register (for ModSib memory address calculations) + // Only x86IndexReg provides operators for constructing x86AddressInfo types. + class x86IndexReg : public x86Register32 { public: - static const x86Register8 Empty; + static const x86IndexReg Empty; // defined as an empty/unused value (-1) + + public: + x86IndexReg(): x86Register32() {} + x86IndexReg( const x86IndexReg& src ) : x86Register32( src.Id ) {} + x86IndexReg( const x86Register32& src ) : x86Register32( src ) {} + explicit x86IndexReg( int regId ) : x86Register32( regId ) {} - int Id; + // Returns true if the register is the stack pointer: ESP. + bool IsStackPointer() const { return Id == 4; } - x86Register8( const x86Register16& src ) : Id( src.Id ) {} - x86Register8() : Id( -1 ) {} - explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + x86AddressInfo operator+( const x86IndexReg& right ) const; + x86AddressInfo operator+( const x86AddressInfo& right ) const; + x86AddressInfo operator+( s32 right ) const; - bool IsEmpty() const { return Id == -1; } - - bool operator==( const x86Register8& src ) const { return Id == src.Id; } - bool operator!=( const x86Register8& src ) const { return Id != src.Id; } - - x86Register8& operator=( const x86Register8& src ) + x86AddressInfo operator*( u32 factor ) const; + x86AddressInfo operator<<( u32 shift ) const; + + x86IndexReg& operator=( const x86Register32& src ) { Id = src.Id; return *this; } }; - - // Use 32 bit registers as out index register (for ModSig memory address calculations) - typedef x86Register32 x86IndexReg; ////////////////////////////////////////////////////////////////////////////////////////// // - class x86ModRm + class x86AddressInfo { public: x86IndexReg Base; // base register (no scale) @@ -255,7 +264,7 @@ namespace x86Emitter s32 Displacement; // address displacement public: - x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) : + __forceinline x86AddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -263,7 +272,7 @@ namespace x86Emitter { } - explicit x86ModRm( x86IndexReg base, int displacement=0 ) : + __forceinline explicit x86AddressInfo( const x86IndexReg& base, int displacement=0 ) : Base( base ), Index(), Factor(0), @@ -271,7 +280,7 @@ namespace x86Emitter { } - explicit x86ModRm( s32 displacement ) : + __forceinline explicit x86AddressInfo( s32 displacement ) : Base(), Index(), Factor(0), @@ -279,62 +288,87 @@ namespace x86Emitter { } - static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 ); + static x86AddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } - x86IndexReg GetEitherReg() const; - x86ModRm& Add( s32 imm ) + __forceinline x86AddressInfo& Add( s32 imm ) { Displacement += imm; return *this; } - x86ModRm& Add( const x86IndexReg& src ); - x86ModRm& Add( const x86ModRm& src ); + __forceinline x86AddressInfo& Add( const x86IndexReg& src ); + __forceinline x86AddressInfo& Add( const x86AddressInfo& src ); - x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); } - x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } - x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } - x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } + __forceinline x86AddressInfo operator+( const x86IndexReg& right ) const { return x86AddressInfo( *this ).Add( right ); } + __forceinline x86AddressInfo operator+( const x86AddressInfo& right ) const { return x86AddressInfo( *this ).Add( right ); } + __forceinline x86AddressInfo operator+( s32 imm ) const { return x86AddressInfo( *this ).Add( imm ); } + __forceinline x86AddressInfo operator-( s32 imm ) const { return x86AddressInfo( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// // ModSib - Internal low-level representation of the ModRM/SIB information. // - // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that - // the Base, Index, Scale, and Displacement values are all valid, and it serves as a type- - // safe layer between the x86Register's operators (which generate x86ModRm types) and the - // emitter's ModSib instruction forms. Without this, the x86Register would pass as a - // ModSib type implicitly, and that would cause ambiguity on a number of instructions. + // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means + // that the Base, Index, Scale, and Displacement values are all in the correct arrange- + // ments, and it serves as a type-safe layer between the x86Register's operators (which + // generate x86AddressInfo types) and the emitter's ModSib instruction forms. Without this, + // the x86Register would pass as a ModSib type implicitly, and that would cause ambiguity + // on a number of instructions. // - class ModSib + // End users should always use x86AddressInfo instead. + // + class ModSibBase { public: x86IndexReg Base; // base register (no scale) x86IndexReg Index; // index reg gets multiplied by the scale - int Scale; // scale applied to the index register, in scale/shift form + uint Scale; // scale applied to the index register, in scale/shift form s32 Displacement; // offset applied to the Base/Index registers. - explicit ModSib( const x86ModRm& src ); - explicit ModSib( s32 disp ); - ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); + public: + explicit ModSibBase( const x86AddressInfo& src ); + explicit ModSibBase( s32 disp ); + ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); - x86IndexReg GetEitherReg() const; bool IsByteSizeDisp() const { return is_s8( Displacement ); } - ModSib& Add( s32 imm ) + __forceinline ModSibBase& Add( s32 imm ) { Displacement += imm; return *this; } - ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); } - ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); } + __forceinline ModSibBase operator+( const s32 imm ) const { return ModSibBase( *this ).Add( imm ); } + __forceinline ModSibBase operator-( const s32 imm ) const { return ModSibBase( *this ).Add( -imm ); } protected: - void Reduce(); + __forceinline void Reduce(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Strictly-typed version of ModSibBase, which is used to apply operand size information + // to ImmToMem operations. + // + template< int OperandSize > + class ModSibStrict : public ModSibBase + { + public: + __forceinline explicit ModSibStrict( const x86AddressInfo& src ) : ModSibBase( src ) {} + __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} + __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : + ModSibBase( base, index, scale, displacement ) {} + + __forceinline ModSibStrict& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } + __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -344,33 +378,390 @@ namespace x86Emitter { // passthrough instruction, allows ModSib to pass silently through ptr translation // without doing anything and without compiler error. - const ModSib& operator[]( const ModSib& src ) const { return src; } + const ModSibBase& operator[]( const ModSibBase& src ) const { return src; } - ModSib operator[]( x86IndexReg src ) const + __forceinline ModSibBase operator[]( x86IndexReg src ) const { - return ModSib( src, x86IndexReg::Empty ); + return ModSibBase( src, x86IndexReg::Empty ); } - ModSib operator[]( const x86ModRm& src ) const + __forceinline ModSibBase operator[]( const x86AddressInfo& src ) const { - return ModSib( src ); + return ModSibBase( src ); } - ModSib operator[]( uptr src ) const + __forceinline ModSibBase operator[]( uptr src ) const { - return ModSib( src ); + return ModSibBase( src ); } - ModSib operator[]( void* src ) const + __forceinline ModSibBase operator[]( const void* src ) const { - return ModSib( (uptr)src ); + return ModSibBase( (uptr)src ); } x86IndexerType() {} }; - // ------------------------------------------------------------------------ + ////////////////////////////////////////////////////////////////////////////////////////// + // Explicit version of ptr[], in the form of ptr32[], ptr16[], etc. which allows + // specification of the operand size for ImmToMem operations. + // + template< int OperandSize > + struct x86IndexerTypeExplicit + { + // passthrough instruction, allows ModSib to pass silently through ptr translation + // without doing anything and without compiler error. + const ModSibStrict& operator[]( const ModSibStrict& src ) const { return src; } + + __forceinline ModSibStrict operator[]( x86IndexReg src ) const + { + return ModSibStrict( src, x86IndexReg::Empty ); + } + + __forceinline ModSibStrict operator[]( const x86AddressInfo& src ) const + { + return ModSibStrict( src ); + } + + __forceinline ModSibStrict operator[]( uptr src ) const + { + return ModSibStrict( src ); + } + + __forceinline ModSibStrict operator[]( const void* src ) const + { + return ModSibStrict( (uptr)src ); + } + }; + extern const x86IndexerType ptr; + extern const x86IndexerTypeExplicit<4> ptr32; + extern const x86IndexerTypeExplicit<2> ptr16; + extern const x86IndexerTypeExplicit<1> ptr8; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + namespace Internal + { + extern void ModRM( uint mod, uint reg, uint rm ); + extern void SibSB( u32 ss, u32 index, u32 base ); + extern void EmitSibMagic( uint regfield, const ModSibBase& info ); + + struct SibMagic + { + static void Emit( uint regfield, const ModSibBase& info ) + { + EmitSibMagic( regfield, info ); + } + }; + + struct SibMagicInline + { + static __forceinline void Emit( uint regfield, const ModSibBase& info ) + { + EmitSibMagic( regfield, info ); + } + }; + + + enum G1Type + { + G1Type_ADD=0, + G1Type_OR, + G1Type_ADC, + G1Type_SBB, + G1Type_AND, + G1Type_SUB, + G1Type_XOR, + G1Type_CMP + }; + + enum G2Type + { + G2Type_ROL=0, + G2Type_ROR, + G2Type_RCL, + G2Type_RCR, + G2Type_SHL, + G2Type_SHR, + G2Type_Unused, + G2Type_SAR + }; + + // ------------------------------------------------------------------- + template< typename ImmType, G1Type InstType, typename SibMagicType > + class Group1Impl + { + public: + static const uint OperandSize = sizeof(ImmType); + + protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + + public: + static __emitinline void Emit( const x86Register& to, const x86Register& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const ModSibBase& sibdest, const x86Register& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + SibMagicType::Emit( from.Id, sibdest ); + } + + static __emitinline void Emit( const x86Register& to, const ModSibBase& sibsrc ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + SibMagicType::Emit( to.Id, sibsrc ); + } + + static __emitinline void Emit( const x86Register& to, ImmType imm ) + { + if( !Is8BitOperand() && is_s8( imm ) ) + { + iWrite( 0x83 ); + ModRM( 3, InstType, to.Id ); + iWrite( imm ); + } + else + { + prefix16(); + if( to.IsAccumulator() ) + iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + else + { + iWrite( Is8BitOperand() ? 0x80 : 0x81 ); + ModRM( 3, InstType, to.Id ); + } + iWrite( imm ); + } + } + + static __emitinline void Emit( const ModSibStrict& sibdest, ImmType imm ) + { + if( Is8BitOperand() ) + { + iWrite( 0x80 ); + SibMagicType::Emit( InstType, sibdest ); + iWrite( imm ); + } + else + { + prefix16(); + iWrite( is_s8( imm ) ? 0x83 : 0x81 ); + SibMagicType::Emit( InstType, sibdest ); + if( is_s8( imm ) ) + iWrite( imm ); + else + iWrite( imm ); + } + } + }; + + // ------------------------------------------------------------------- + // Group 2 (shift) instructions have no Sib/ModRM forms. + // Note: For Imm forms, we ignore the instruction if the shift count is zero. This + // is a safe optimization since any zero-value shift does not affect any flags. + // + template< typename ImmType, G2Type InstType, typename SibMagicType > + class Group2Impl + { + public: + static const uint OperandSize = sizeof(ImmType); + + protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + + public: + static __emitinline void Emit( const x86Register& to, const x86Register8& from ) + { + jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) + + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + ModRM( 3, InstType, to.Id ); + } + + static __emitinline void Emit( const x86Register& to, u8 imm ) + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + ModRM( 3, InstType, to.Id ); + } + else + { + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + ModRM( 3, InstType, to.Id ); + iWrite( imm ); + } + } + + static __emitinline void Emit( const ModSibStrict& sibdest, const x86Register8& from ) + { + jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) + + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + SibMagicType::Emit( from.Id, sibdest ); + } + + static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + SibMagicType::Emit( InstType, sibdest ); + } + else + { + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + SibMagicType::Emit( InstType, sibdest ); + iWrite( imm ); + } + } + }; + + // ------------------------------------------------------------------- + // + template< G1Type InstType > + class Group1ImplAll + { + protected: + typedef Group1Impl m_32; + typedef Group1Impl m_16; + typedef Group1Impl m_8; + + typedef Group1Impl m_32i; + typedef Group1Impl m_16i; + typedef Group1Impl m_8i; + + // Inlining Notes: + // I've set up the inlining to be as practical and intelligent as possible, which means + // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to + // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- + // cretion of the compiler. + // + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + + public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const x86Register32& to, const x86Register32& from ) const { m_32i::Emit( to, from ); } + __forceinline void operator()( const x86Register32& to, const void* src ) const { m_32i::Emit( to, ptr32[src] ); } + __forceinline void operator()( const void* dest, const x86Register32& from ) const { m_32i::Emit( ptr32[dest], from ); } + __noinline void operator()( const ModSibBase& sibdest, const x86Register32& from ) const { m_32::Emit( sibdest, from ); } + __noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } + + void operator()( const x86Register32& to, u32 imm ) const { m_32i::Emit( to, imm ); } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); } + __forceinline void operator()( const x86Register16& to, const void* src ) const { m_16i::Emit( to, ptr16[src] ); } + __forceinline void operator()( const void* dest, const x86Register16& from ) const { m_16i::Emit( ptr16[dest], from ); } + __noinline void operator()( const ModSibBase& sibdest, const x86Register16& from ) const { m_16::Emit( sibdest, from ); } + __noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } + + void operator()( const x86Register16& to, u16 imm ) const { m_16i::Emit( to, imm ); } + + // ---------- 8 Bit Interface ----------- + __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); } + __forceinline void operator()( const x86Register8& to, const void* src ) const { m_8i::Emit( to, ptr8[src] ); } + __forceinline void operator()( const void* dest, const x86Register8& from ) const { m_8i::Emit( ptr8[dest], from ); } + __noinline void operator()( const ModSibBase& sibdest, const x86Register8& from ) const { m_8::Emit( sibdest, from ); } + __noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } + + void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } + }; + + + // ------------------------------------------------------------------- + // + template< G2Type InstType > + class Group2ImplAll + { + protected: + typedef Group2Impl m_32; + typedef Group2Impl m_16; + typedef Group2Impl m_8; + + typedef Group2Impl m_32i; + typedef Group2Impl m_16i; + typedef Group2Impl m_8i; + + // Inlining Notes: + // I've set up the inlining to be as practical and intelligent as possible, which means + // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to + // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- + // cretion of the compiler. + // + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + + public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const x86Register32& to, const x86Register8& from ) const{ m_32i::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, const x86Register8& from ) const{ m_32::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); } + void operator()( const x86Register32& to, u8 imm ) const { m_32i::Emit( to, imm ); } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const x86Register16& to, const x86Register8& from ) const{ m_16i::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, const x86Register8& from ) const{ m_16::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); } + void operator()( const x86Register16& to, u8 imm ) const { m_16i::Emit( to, imm ); } + + // ---------- 8 Bit Interface ----------- + __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const{ m_8i::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } + void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } + }; + + // Define the externals for Group1/2 instructions here (inside the Internal namespace). + // and then import then into the x86Emitter namespace later. Done because it saves a + // lot of Internal:: namespace resolution mess, and is better than the alternative of + // importing Internal into x86Emitter, which done at the header file level would defeat + // the purpose!) + + extern const Group1ImplAll ADD; + extern const Group1ImplAll OR; + extern const Group1ImplAll ADC; + extern const Group1ImplAll SBB; + extern const Group1ImplAll AND; + extern const Group1ImplAll SUB; + extern const Group1ImplAll XOR; + extern const Group1ImplAll CMP; + + extern const Group2ImplAll ROL; + extern const Group2ImplAll ROR; + extern const Group2ImplAll RCL; + extern const Group2ImplAll RCR; + extern const Group2ImplAll SHL; + extern const Group2ImplAll SHR; + extern const Group2ImplAll SAR; + } + + // ------------------------------------------------------------------------ extern const x86Register32 eax; extern const x86Register32 ebx; @@ -398,4 +789,6 @@ namespace x86Emitter extern const x86Register8 ch; extern const x86Register8 dh; extern const x86Register8 bh; -} \ No newline at end of file +} + +#include "ix86_inlines.inl"