2009-02-09 21:15:56 +00:00
|
|
|
/* Pcsx2 - Pc Ps2 Emulator
|
2009-02-15 23:23:46 +00:00
|
|
|
* Copyright (C) 2002-2009 Pcsx2 Team
|
2009-02-09 21:15:56 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
|
|
*/
|
2009-04-14 01:26:57 +00:00
|
|
|
|
2009-02-09 21:15:56 +00:00
|
|
|
/*
|
2009-04-14 01:26:57 +00:00
|
|
|
* ix86 core v0.9.0
|
|
|
|
*
|
|
|
|
* Original Authors (v0.6.2 and prior):
|
|
|
|
* linuzappz <linuzappz@pcsx.net>
|
|
|
|
* alexey silinov
|
|
|
|
* goldfinger
|
|
|
|
* zerofrog(@gmail.com)
|
|
|
|
*
|
|
|
|
* Authors of v0.9.0:
|
|
|
|
* Jake.Stine(@gmail.com)
|
|
|
|
* cottonvibes(@gmail.com)
|
|
|
|
* sudonim(1@gmail.com)
|
2009-02-09 21:15:56 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "PrecompiledHeader.h"
|
2009-03-01 20:44:48 +00:00
|
|
|
|
2009-02-09 21:15:56 +00:00
|
|
|
#include "System.h"
|
2009-04-07 21:54:50 +00:00
|
|
|
#include "ix86_internal.h"
|
2009-02-09 21:15:56 +00:00
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
// Notes on Thread Local Storage:
|
|
|
|
// * TLS is pretty simple, and "just works" from a programmer perspective, with only
|
|
|
|
// some minor additional computational overhead (see performance notes below).
|
|
|
|
//
|
|
|
|
// * MSVC and GCC handle TLS differently internally, but behavior to the programmer is
|
|
|
|
// generally identical.
|
|
|
|
//
|
|
|
|
// Performance Considerations:
|
|
|
|
// * GCC's implementation involves an extra dereference from normal storage.
|
|
|
|
//
|
|
|
|
// * MSVC's implementation involves *two* extra dereferences from normal storage because
|
|
|
|
// it has to look up the TLS heap pointer from the Windows Thread Storage Area. (in
|
|
|
|
// generated ASM code, this dereference is denoted by access to the fs:[2ch] address).
|
|
|
|
//
|
|
|
|
// * However, in either case, the optimizer usually optimizes it to a register so the
|
|
|
|
// extra overhead is minimal over a series of instructions. (Note!! the Full Opt-
|
|
|
|
// imization [/Ox] option effectively disables TLS optimizations in MSVC, causing
|
|
|
|
// generally significant code bloat).
|
|
|
|
//
|
|
|
|
|
|
|
|
|
2009-04-07 16:54:02 +00:00
|
|
|
__threadlocal u8 *x86Ptr;
|
|
|
|
__threadlocal u8 *j8Ptr[32];
|
|
|
|
__threadlocal u32 *j32Ptr[32];
|
2009-02-09 21:15:56 +00:00
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
__threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT };
|
2009-04-07 08:42:25 +00:00
|
|
|
|
2009-04-07 21:54:50 +00:00
|
|
|
namespace x86Emitter {
|
|
|
|
|
2009-04-19 02:14:50 +00:00
|
|
|
const iAddressIndexerBase ptr;
|
2009-04-19 05:24:20 +00:00
|
|
|
const iAddressIndexer<u128> ptr128;
|
|
|
|
const iAddressIndexer<u64> ptr64;
|
2009-04-19 02:14:50 +00:00
|
|
|
const iAddressIndexer<u32> ptr32;
|
|
|
|
const iAddressIndexer<u16> ptr16;
|
|
|
|
const iAddressIndexer<u8> ptr8;
|
2009-04-07 21:54:50 +00:00
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
// ------------------------------------------------------------------------
|
2009-04-14 12:37:48 +00:00
|
|
|
|
2009-04-16 22:38:55 +00:00
|
|
|
template< typename OperandType > const iRegister<OperandType> iRegister<OperandType>::Empty;
|
2009-04-19 05:24:20 +00:00
|
|
|
const iAddressReg iAddressReg::Empty;
|
|
|
|
|
|
|
|
const iRegisterSSE
|
|
|
|
xmm0( 0 ), xmm1( 1 ),
|
|
|
|
xmm2( 2 ), xmm3( 3 ),
|
|
|
|
xmm4( 4 ), xmm5( 5 ),
|
|
|
|
xmm6( 6 ), xmm7( 7 );
|
|
|
|
|
|
|
|
const iRegisterMMX
|
|
|
|
mm0( 0 ), mm1( 1 ),
|
|
|
|
mm2( 2 ), mm3( 3 ),
|
|
|
|
mm4( 4 ), mm5( 5 ),
|
|
|
|
mm6( 6 ), mm7( 7 );
|
2009-04-14 01:26:57 +00:00
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
const iRegister32
|
2009-04-14 01:26:57 +00:00
|
|
|
eax( 0 ), ebx( 3 ),
|
|
|
|
ecx( 1 ), edx( 2 ),
|
|
|
|
esi( 6 ), edi( 7 ),
|
|
|
|
ebp( 5 ), esp( 4 );
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
const iRegister16
|
2009-04-14 01:26:57 +00:00
|
|
|
ax( 0 ), bx( 3 ),
|
|
|
|
cx( 1 ), dx( 2 ),
|
|
|
|
si( 6 ), di( 7 ),
|
|
|
|
bp( 5 ), sp( 4 );
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
const iRegister8
|
2009-04-16 01:34:09 +00:00
|
|
|
al( 0 ),
|
2009-04-14 01:26:57 +00:00
|
|
|
dl( 2 ), bl( 3 ),
|
|
|
|
ah( 4 ), ch( 5 ),
|
|
|
|
dh( 6 ), bh( 7 );
|
2009-04-16 01:34:09 +00:00
|
|
|
|
|
|
|
const iRegisterCL cl;
|
2009-04-14 01:26:57 +00:00
|
|
|
|
|
|
|
namespace Internal
|
2009-04-07 08:42:25 +00:00
|
|
|
{
|
2009-04-14 01:26:57 +00:00
|
|
|
// Performance note: VC++ wants to use byte/word register form for the following
|
2009-04-15 21:00:32 +00:00
|
|
|
// ModRM/SibSB constructors when we use iWrite<u8>, and furthermore unrolls the
|
2009-04-14 01:26:57 +00:00
|
|
|
// the shift using a series of ADDs for the following results:
|
|
|
|
// add cl,cl
|
|
|
|
// add cl,cl
|
|
|
|
// add cl,cl
|
|
|
|
// or cl,bl
|
|
|
|
// add cl,cl
|
|
|
|
// ... etc.
|
|
|
|
//
|
|
|
|
// This is unquestionably bad optimization by Core2 standard, an generates tons of
|
|
|
|
// register aliases and false dependencies. (although may have been ideal for early-
|
|
|
|
// brand P4s with a broken barrel shifter?). The workaround is to do our own manual
|
|
|
|
// x86Ptr access and update using a u32 instead of u8. Thanks to little endianness,
|
2009-04-15 21:00:32 +00:00
|
|
|
// the same end result is achieved and no false dependencies are generated. The draw-
|
|
|
|
// back is that it clobbers 3 bytes past the end of the write, which could cause a
|
|
|
|
// headache for someone who himself is doing some kind of headache-inducing amount of
|
|
|
|
// recompiler SMC. So we don't do a work-around, and just hope for the compiler to
|
|
|
|
// stop sucking someday instead. :)
|
2009-04-14 01:26:57 +00:00
|
|
|
//
|
|
|
|
// (btw, I know this isn't a critical performance item by any means, but it's
|
|
|
|
// annoying simply because it *should* be an easy thing to optimize)
|
|
|
|
|
|
|
|
__forceinline void ModRM( uint mod, uint reg, uint rm )
|
|
|
|
{
|
2009-04-15 21:00:32 +00:00
|
|
|
iWrite<u8>( (mod << 6) | (reg << 3) | rm );
|
|
|
|
//*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm;
|
|
|
|
//x86Ptr++;
|
2009-04-14 01:26:57 +00:00
|
|
|
}
|
2009-04-07 08:42:25 +00:00
|
|
|
|
2009-04-16 14:45:13 +00:00
|
|
|
__forceinline void ModRM_Direct( uint reg, uint rm )
|
|
|
|
{
|
|
|
|
ModRM( Mod_Direct, reg, rm );
|
|
|
|
}
|
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
__forceinline void SibSB( u32 ss, u32 index, u32 base )
|
|
|
|
{
|
2009-04-15 21:00:32 +00:00
|
|
|
iWrite<u8>( (ss << 6) | (index << 3) | base );
|
|
|
|
//*(u32*)x86Ptr = (ss << 6) | (index << 3) | base;
|
|
|
|
//x86Ptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline void iWriteDisp( int regfield, s32 displacement )
|
|
|
|
{
|
|
|
|
ModRM( 0, regfield, ModRm_UseDisp32 );
|
|
|
|
iWrite<s32>( displacement );
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline void iWriteDisp( int regfield, const void* address )
|
|
|
|
{
|
|
|
|
iWriteDisp( regfield, (s32)address );
|
2009-04-14 01:26:57 +00:00
|
|
|
}
|
2009-04-07 08:42:25 +00:00
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the
|
|
|
|
// instruction ca be encoded as ModRm alone.
|
|
|
|
static __forceinline bool NeedsSibMagic( const ModSibBase& info )
|
|
|
|
{
|
|
|
|
// no registers? no sibs!
|
2009-04-15 15:45:52 +00:00
|
|
|
// (ModSibBase::Reduce always places a register in Index, and optionally leaves
|
|
|
|
// Base empty if only register is specified)
|
2009-04-14 01:26:57 +00:00
|
|
|
if( info.Index.IsEmpty() ) return false;
|
|
|
|
|
|
|
|
// A scaled register needs a SIB
|
|
|
|
if( info.Scale != 0 ) return true;
|
|
|
|
|
|
|
|
// two registers needs a SIB
|
|
|
|
if( !info.Base.IsEmpty() ) return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Conditionally generates Sib encoding information!
|
|
|
|
//
|
|
|
|
// regfield - register field to be written to the ModRm. This is either a register specifier
|
|
|
|
// or an opcode extension. In either case, the instruction determines the value for us.
|
|
|
|
//
|
2009-04-15 21:00:32 +00:00
|
|
|
void EmitSibMagic( uint regfield, const ModSibBase& info )
|
2009-04-14 01:26:57 +00:00
|
|
|
{
|
|
|
|
jASSUME( regfield < 8 );
|
|
|
|
|
|
|
|
int displacement_size = (info.Displacement == 0) ? 0 :
|
|
|
|
( ( info.IsByteSizeDisp() ) ? 1 : 2 );
|
|
|
|
|
|
|
|
if( !NeedsSibMagic( info ) )
|
|
|
|
{
|
|
|
|
// Use ModRm-only encoding, with the rm field holding an index/base register, if
|
|
|
|
// one has been specified. If neither register is specified then use Disp32 form,
|
|
|
|
// which is encoded as "EBP w/o displacement" (which is why EBP must always be
|
|
|
|
// encoded *with* a displacement of 0, if it would otherwise not have one).
|
|
|
|
|
|
|
|
if( info.Index.IsEmpty() )
|
|
|
|
{
|
2009-04-15 21:00:32 +00:00
|
|
|
iWriteDisp( regfield, info.Displacement );
|
2009-04-14 01:26:57 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if( info.Index == ebp && displacement_size == 0 )
|
|
|
|
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
|
|
|
|
|
|
|
|
ModRM( displacement_size, regfield, info.Index.Id );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// In order to encode "just" index*scale (and no base), we have to encode
|
|
|
|
// it as a special [index*scale + displacement] form, which is done by
|
|
|
|
// specifying EBP as the base register and setting the displacement field
|
|
|
|
// to zero. (same as ModRm w/o SIB form above, basically, except the
|
|
|
|
// ModRm_UseDisp flag is specified in the SIB instead of the ModRM field).
|
|
|
|
|
|
|
|
if( info.Base.IsEmpty() )
|
|
|
|
{
|
|
|
|
ModRM( 0, regfield, ModRm_UseSib );
|
|
|
|
SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
|
2009-04-15 15:45:52 +00:00
|
|
|
iWrite<s32>( info.Displacement );
|
2009-04-14 01:26:57 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if( info.Base == ebp && displacement_size == 0 )
|
|
|
|
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
|
|
|
|
|
|
|
|
ModRM( displacement_size, regfield, ModRm_UseSib );
|
|
|
|
SibSB( info.Scale, info.Index.Id, info.Base.Id );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( displacement_size != 0 )
|
|
|
|
{
|
2009-04-15 21:00:32 +00:00
|
|
|
if( displacement_size == 1 )
|
|
|
|
iWrite<s8>( info.Displacement );
|
|
|
|
else
|
|
|
|
iWrite<s32>( info.Displacement );
|
2009-04-14 01:26:57 +00:00
|
|
|
}
|
|
|
|
}
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
using namespace Internal;
|
|
|
|
|
2009-04-16 22:38:55 +00:00
|
|
|
const MovImplAll iMOV;
|
2009-04-17 18:47:04 +00:00
|
|
|
const TestImplAll iTEST;
|
2009-04-16 22:38:55 +00:00
|
|
|
|
2009-04-20 00:06:51 +00:00
|
|
|
const G1LogicImpl<G1Type_AND,0x54> iAND;
|
|
|
|
const G1LogicImpl<G1Type_OR,0x56> iOR;
|
|
|
|
const G1LogicImpl<G1Type_XOR,0x57> iXOR;
|
|
|
|
|
|
|
|
const G1ArithmeticImpl<G1Type_ADD,0x58> iADD;
|
|
|
|
const G1ArithmeticImpl<G1Type_SUB,0x5c> iSUB;
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
const Group1ImplAll<G1Type_ADC> iADC;
|
|
|
|
const Group1ImplAll<G1Type_SBB> iSBB;
|
|
|
|
const Group1ImplAll<G1Type_CMP> iCMP;
|
|
|
|
|
|
|
|
const Group2ImplAll<G2Type_ROL> iROL;
|
|
|
|
const Group2ImplAll<G2Type_ROR> iROR;
|
|
|
|
const Group2ImplAll<G2Type_RCL> iRCL;
|
|
|
|
const Group2ImplAll<G2Type_RCR> iRCR;
|
|
|
|
const Group2ImplAll<G2Type_SHL> iSHL;
|
|
|
|
const Group2ImplAll<G2Type_SHR> iSHR;
|
|
|
|
const Group2ImplAll<G2Type_SAR> iSAR;
|
|
|
|
|
2009-04-16 22:38:55 +00:00
|
|
|
const Group3ImplAll<G3Type_NOT> iNOT;
|
|
|
|
const Group3ImplAll<G3Type_NEG> iNEG;
|
|
|
|
const Group3ImplAll<G3Type_MUL> iUMUL;
|
|
|
|
const Group3ImplAll<G3Type_DIV> iUDIV;
|
2009-04-20 00:06:51 +00:00
|
|
|
const G3Impl_PlusSSE<G3Type_iDIV,0x5e> iDIV;
|
|
|
|
const iMul_PlusSSE iMUL;
|
2009-04-16 22:38:55 +00:00
|
|
|
|
|
|
|
const IncDecImplAll<false> iINC;
|
|
|
|
const IncDecImplAll<true> iDEC;
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
const MovExtendImplAll<false> iMOVZX;
|
2009-04-16 01:34:09 +00:00
|
|
|
const MovExtendImplAll<true> iMOVSX;
|
|
|
|
|
2009-04-17 18:47:04 +00:00
|
|
|
const DwordShiftImplAll<false> iSHLD;
|
|
|
|
const DwordShiftImplAll<true> iSHRD;
|
2009-04-15 21:00:32 +00:00
|
|
|
|
2009-04-17 18:47:04 +00:00
|
|
|
const Group8ImplAll<G8Type_BT> iBT;
|
|
|
|
const Group8ImplAll<G8Type_BTR> iBTR;
|
|
|
|
const Group8ImplAll<G8Type_BTS> iBTS;
|
|
|
|
const Group8ImplAll<G8Type_BTC> iBTC;
|
|
|
|
|
2009-04-19 02:14:50 +00:00
|
|
|
const BitScanImplAll<false> iBSF;
|
|
|
|
const BitScanImplAll<true> iBSR;
|
|
|
|
|
2009-04-17 18:47:04 +00:00
|
|
|
// ------------------------------------------------------------------------
|
2009-04-15 21:00:32 +00:00
|
|
|
const CMovImplGeneric iCMOV;
|
|
|
|
|
|
|
|
const CMovImplAll<Jcc_Above> iCMOVA;
|
|
|
|
const CMovImplAll<Jcc_AboveOrEqual> iCMOVAE;
|
|
|
|
const CMovImplAll<Jcc_Below> iCMOVB;
|
|
|
|
const CMovImplAll<Jcc_BelowOrEqual> iCMOVBE;
|
|
|
|
|
|
|
|
const CMovImplAll<Jcc_Greater> iCMOVG;
|
|
|
|
const CMovImplAll<Jcc_GreaterOrEqual> iCMOVGE;
|
|
|
|
const CMovImplAll<Jcc_Less> iCMOVL;
|
|
|
|
const CMovImplAll<Jcc_LessOrEqual> iCMOVLE;
|
|
|
|
|
|
|
|
const CMovImplAll<Jcc_Zero> iCMOVZ;
|
|
|
|
const CMovImplAll<Jcc_Equal> iCMOVE;
|
|
|
|
const CMovImplAll<Jcc_NotZero> iCMOVNZ;
|
|
|
|
const CMovImplAll<Jcc_NotEqual> iCMOVNE;
|
|
|
|
|
|
|
|
const CMovImplAll<Jcc_Overflow> iCMOVO;
|
|
|
|
const CMovImplAll<Jcc_NotOverflow> iCMOVNO;
|
|
|
|
const CMovImplAll<Jcc_Carry> iCMOVC;
|
|
|
|
const CMovImplAll<Jcc_NotCarry> iCMOVNC;
|
|
|
|
|
|
|
|
const CMovImplAll<Jcc_Signed> iCMOVS;
|
|
|
|
const CMovImplAll<Jcc_Unsigned> iCMOVNS;
|
|
|
|
const CMovImplAll<Jcc_ParityEven> iCMOVPE;
|
|
|
|
const CMovImplAll<Jcc_ParityOdd> iCMOVPO;
|
|
|
|
|
2009-04-17 18:47:04 +00:00
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
const SetImplGeneric iSET;
|
|
|
|
|
|
|
|
const SetImplAll<Jcc_Above> iSETA;
|
|
|
|
const SetImplAll<Jcc_AboveOrEqual> iSETAE;
|
|
|
|
const SetImplAll<Jcc_Below> iSETB;
|
|
|
|
const SetImplAll<Jcc_BelowOrEqual> iSETBE;
|
|
|
|
|
|
|
|
const SetImplAll<Jcc_Greater> iSETG;
|
|
|
|
const SetImplAll<Jcc_GreaterOrEqual> iSETGE;
|
|
|
|
const SetImplAll<Jcc_Less> iSETL;
|
|
|
|
const SetImplAll<Jcc_LessOrEqual> iSETLE;
|
|
|
|
|
|
|
|
const SetImplAll<Jcc_Zero> iSETZ;
|
|
|
|
const SetImplAll<Jcc_Equal> iSETE;
|
|
|
|
const SetImplAll<Jcc_NotZero> iSETNZ;
|
|
|
|
const SetImplAll<Jcc_NotEqual> iSETNE;
|
|
|
|
|
|
|
|
const SetImplAll<Jcc_Overflow> iSETO;
|
|
|
|
const SetImplAll<Jcc_NotOverflow> iSETNO;
|
|
|
|
const SetImplAll<Jcc_Carry> iSETC;
|
|
|
|
const SetImplAll<Jcc_NotCarry> iSETNC;
|
|
|
|
|
|
|
|
const SetImplAll<Jcc_Signed> iSETS;
|
|
|
|
const SetImplAll<Jcc_Unsigned> iSETNS;
|
|
|
|
const SetImplAll<Jcc_ParityEven> iSETPE;
|
|
|
|
const SetImplAll<Jcc_ParityOdd> iSETPO;
|
|
|
|
|
|
|
|
|
2009-04-15 15:45:52 +00:00
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
// Assigns the current emitter buffer target address.
|
|
|
|
// This is provided instead of using x86Ptr directly, since we may in the future find
|
|
|
|
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
|
|
|
__emitinline void iSetPtr( void* ptr )
|
|
|
|
{
|
|
|
|
x86Ptr = (u8*)ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
// Retrieves the current emitter buffer target address.
|
|
|
|
// This is provided instead of using x86Ptr directly, since we may in the future find
|
|
|
|
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
|
|
|
__emitinline u8* iGetPtr()
|
|
|
|
{
|
|
|
|
return x86Ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
__emitinline void iAlignPtr( uint bytes )
|
|
|
|
{
|
|
|
|
// forward align
|
|
|
|
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) );
|
|
|
|
}
|
|
|
|
|
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
__emitinline void iAdvancePtr( uint bytes )
|
|
|
|
{
|
|
|
|
if( IsDevBuild )
|
|
|
|
{
|
|
|
|
// common debugger courtesy: advance with INT3 as filler.
|
|
|
|
for( uint i=0; i<bytes; i++ )
|
|
|
|
iWrite<u8>( 0xcc );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
x86Ptr += bytes;
|
|
|
|
}
|
|
|
|
|
2009-04-19 05:24:20 +00:00
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values.
|
|
|
|
// Necessary because by default ModSib compounds registers into Index when possible.
|
|
|
|
//
|
|
|
|
// If the ModSib is in illegal form ([Base + Index*5] for example) then an assertion
|
|
|
|
// followed by an InvalidParameter Exception will be tossed around in haphazard
|
|
|
|
// fashion.
|
|
|
|
//
|
|
|
|
// Optimization Note: Currently VC does a piss poor job of inlining this, even though
|
|
|
|
// constant propagation *should* resove it to little or no code (VC's constprop fails
|
|
|
|
// on C++ class initializers). There is a work around [using array initializers instead]
|
|
|
|
// but it's too much trouble for code that isn't performance critical anyway.
|
|
|
|
// And, with luck, maybe VC10 will optimize it better and make it a non-issue. :D
|
|
|
|
//
|
|
|
|
void ModSibBase::Reduce()
|
|
|
|
{
|
|
|
|
if( Index.IsStackPointer() )
|
|
|
|
{
|
|
|
|
// esp cannot be encoded as the index, so move it to the Base, if possible.
|
|
|
|
// note: intentionally leave index assigned to esp also (generates correct
|
|
|
|
// encoding later, since ESP cannot be encoded 'alone')
|
|
|
|
|
|
|
|
jASSUME( Scale == 0 ); // esp can't have an index modifier!
|
|
|
|
jASSUME( Base.IsEmpty() ); // base must be empty or else!
|
|
|
|
|
|
|
|
Base = Index;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If no index reg, then load the base register into the index slot.
|
|
|
|
if( Index.IsEmpty() )
|
|
|
|
{
|
|
|
|
Index = Base;
|
|
|
|
Scale = 0;
|
|
|
|
if( !Base.IsStackPointer() ) // prevent ESP from being encoded 'alone'
|
|
|
|
Base = iAddressReg::Empty;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// The Scale has a series of valid forms, all shown here:
|
|
|
|
|
|
|
|
switch( Scale )
|
|
|
|
{
|
|
|
|
case 0: break;
|
|
|
|
case 1: Scale = 0; break;
|
|
|
|
case 2: Scale = 1; break;
|
|
|
|
|
|
|
|
case 3: // becomes [reg*2+reg]
|
|
|
|
jASSUME( Base.IsEmpty() );
|
|
|
|
Base = Index;
|
|
|
|
Scale = 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 4: Scale = 2; break;
|
|
|
|
|
|
|
|
case 5: // becomes [reg*4+reg]
|
|
|
|
jASSUME( Base.IsEmpty() );
|
|
|
|
Base = Index;
|
|
|
|
Scale = 2;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 6: // invalid!
|
|
|
|
assert( false );
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 7: // so invalid!
|
|
|
|
assert( false );
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 8: Scale = 3; break;
|
|
|
|
case 9: // becomes [reg*8+reg]
|
|
|
|
jASSUME( Base.IsEmpty() );
|
|
|
|
Base = Index;
|
|
|
|
Scale = 3;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
// Internal implementation of EmitSibMagic which has been custom tailored
|
|
|
|
// to optimize special forms of the Lea instructions accordingly, such
|
|
|
|
// as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg".
|
|
|
|
//
|
2009-04-14 12:37:48 +00:00
|
|
|
// preserve_flags - set to ture to disable use of SHL on [Index*Base] forms
|
|
|
|
// of LEA, which alters flags states.
|
|
|
|
//
|
2009-04-16 22:38:55 +00:00
|
|
|
template< typename OperandType >
|
|
|
|
static void EmitLeaMagic( iRegister<OperandType> to, const ModSibBase& src, bool preserve_flags )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
2009-04-16 22:38:55 +00:00
|
|
|
typedef iRegister<OperandType> ToReg;
|
|
|
|
|
2009-04-08 06:25:40 +00:00
|
|
|
int displacement_size = (src.Displacement == 0) ? 0 :
|
|
|
|
( ( src.IsByteSizeDisp() ) ? 1 : 2 );
|
|
|
|
|
|
|
|
// See EmitSibMagic for commenting on SIB encoding.
|
|
|
|
|
|
|
|
if( !NeedsSibMagic( src ) )
|
|
|
|
{
|
|
|
|
// LEA Land: means we have either 1-register encoding or just an offset.
|
|
|
|
// offset is encodable as an immediate MOV, and a register is encodable
|
|
|
|
// as a register MOV.
|
|
|
|
|
|
|
|
if( src.Index.IsEmpty() )
|
|
|
|
{
|
2009-04-15 15:45:52 +00:00
|
|
|
iMOV( to, src.Displacement );
|
2009-04-08 06:25:40 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else if( displacement_size == 0 )
|
|
|
|
{
|
2009-04-15 15:45:52 +00:00
|
|
|
iMOV( to, ToReg( src.Index.Id ) );
|
2009-04-08 06:25:40 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2009-04-15 15:45:52 +00:00
|
|
|
if( !preserve_flags )
|
|
|
|
{
|
|
|
|
// encode as MOV and ADD combo. Make sure to use the immediate on the
|
|
|
|
// ADD since it can encode as an 8-bit sign-extended value.
|
|
|
|
|
|
|
|
iMOV( to, ToReg( src.Index.Id ) );
|
|
|
|
iADD( to, src.Displacement );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// note: no need to do ebp+0 check since we encode all 0 displacements as
|
|
|
|
// register assignments above (via MOV)
|
2009-04-08 06:25:40 +00:00
|
|
|
|
2009-04-15 15:45:52 +00:00
|
|
|
iWrite<u8>( 0x8d );
|
|
|
|
ModRM( displacement_size, to.Id, src.Index.Id );
|
|
|
|
}
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if( src.Base.IsEmpty() )
|
|
|
|
{
|
2009-04-14 12:37:48 +00:00
|
|
|
if( !preserve_flags && (displacement_size == 0) )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
|
|
|
// Encode [Index*Scale] as a combination of Mov and Shl.
|
2009-04-14 01:26:57 +00:00
|
|
|
// This is more efficient because of the bloated LEA format which requires
|
2009-04-14 12:37:48 +00:00
|
|
|
// a 32 bit displacement, and the compact nature of the alternative.
|
2009-04-14 01:26:57 +00:00
|
|
|
//
|
|
|
|
// (this does not apply to older model P4s with the broken barrel shifter,
|
|
|
|
// but we currently aren't optimizing for that target anyway).
|
|
|
|
|
2009-04-15 15:45:52 +00:00
|
|
|
iMOV( to, ToReg( src.Index.Id ) );
|
|
|
|
iSHL( to, src.Scale );
|
2009-04-08 06:25:40 +00:00
|
|
|
return;
|
|
|
|
}
|
2009-04-14 01:26:57 +00:00
|
|
|
iWrite<u8>( 0x8d );
|
2009-04-08 06:25:40 +00:00
|
|
|
ModRM( 0, to.Id, ModRm_UseSib );
|
|
|
|
SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 );
|
2009-04-14 01:26:57 +00:00
|
|
|
iWrite<u32>( src.Displacement );
|
|
|
|
return;
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2009-04-15 15:45:52 +00:00
|
|
|
if( src.Scale == 0 )
|
|
|
|
{
|
|
|
|
if( !preserve_flags )
|
|
|
|
{
|
|
|
|
if( src.Index == esp )
|
|
|
|
{
|
|
|
|
// ESP is not encodable as an index (ix86 ignores it), thus:
|
|
|
|
iMOV( to, ToReg( src.Base.Id ) ); // will do the trick!
|
2009-04-17 01:54:35 +00:00
|
|
|
if( src.Displacement ) iADD( to, src.Displacement );
|
2009-04-15 15:45:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
else if( src.Displacement == 0 )
|
|
|
|
{
|
|
|
|
iMOV( to, ToReg( src.Base.Id ) );
|
|
|
|
iADD( to, ToReg( src.Index.Id ) );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if( (src.Index == esp) && (src.Displacement == 0) )
|
|
|
|
{
|
|
|
|
// special case handling of ESP as Index, which is replaceable with
|
|
|
|
// a single MOV even when preserve_flags is set! :D
|
|
|
|
|
|
|
|
iMOV( to, ToReg( src.Base.Id ) );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-08 06:25:40 +00:00
|
|
|
if( src.Base == ebp && displacement_size == 0 )
|
|
|
|
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
|
|
|
|
|
2009-04-14 01:26:57 +00:00
|
|
|
iWrite<u8>( 0x8d );
|
2009-04-08 06:25:40 +00:00
|
|
|
ModRM( displacement_size, to.Id, ModRm_UseSib );
|
|
|
|
SibSB( src.Scale, src.Index.Id, src.Base.Id );
|
|
|
|
}
|
|
|
|
}
|
2009-04-14 01:26:57 +00:00
|
|
|
|
|
|
|
if( displacement_size != 0 )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
2009-04-15 21:00:32 +00:00
|
|
|
if( displacement_size == 1 )
|
|
|
|
iWrite<s8>( src.Displacement );
|
|
|
|
else
|
|
|
|
iWrite<s32>( src.Displacement );
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
__emitinline void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
2009-04-14 12:37:48 +00:00
|
|
|
EmitLeaMagic( to, src, preserve_flags );
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
__emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
|
|
|
write8( 0x66 );
|
2009-04-14 12:37:48 +00:00
|
|
|
EmitLeaMagic( to, src, preserve_flags );
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
|
2009-04-16 01:34:09 +00:00
|
|
|
|
2009-04-08 06:25:40 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Push / Pop Emitters
|
|
|
|
//
|
|
|
|
// Note: pushad/popad implementations are intentionally left out. The instructions are
|
|
|
|
// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead.
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
__emitinline void iPOP( const ModSibBase& from )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
2009-04-15 15:45:52 +00:00
|
|
|
iWrite<u8>( 0x8f );
|
2009-04-17 18:47:04 +00:00
|
|
|
EmitSibMagic( 0, from );
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
|
2009-04-15 21:00:32 +00:00
|
|
|
__emitinline void iPUSH( const ModSibBase& from )
|
2009-04-08 06:25:40 +00:00
|
|
|
{
|
2009-04-15 15:45:52 +00:00
|
|
|
iWrite<u8>( 0xff );
|
2009-04-17 18:47:04 +00:00
|
|
|
EmitSibMagic( 6, from );
|
2009-04-08 06:25:40 +00:00
|
|
|
}
|
|
|
|
|
2009-04-19 02:14:50 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
__emitinline void iBSWAP( const iRegister32& to )
|
|
|
|
{
|
|
|
|
write8( 0x0F );
|
|
|
|
write8( 0xC8 | to.Id );
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// MMX / XMM Instructions
|
|
|
|
// (these will get put in their own file later)
|
|
|
|
|
2009-04-19 05:24:20 +00:00
|
|
|
const MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS;
|
|
|
|
const MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS;
|
|
|
|
const MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD;
|
|
|
|
const MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD;
|
2009-04-19 02:14:50 +00:00
|
|
|
|
2009-04-19 16:34:29 +00:00
|
|
|
#ifdef ALWAYS_USE_MOVAPS
|
2009-04-19 05:24:20 +00:00
|
|
|
const MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA;
|
|
|
|
const MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU;
|
2009-04-19 16:34:29 +00:00
|
|
|
#else
|
|
|
|
const MovapsImplAll< 0, 0x28, 0x29 > iMOVDQA;
|
|
|
|
const MovapsImplAll< 0, 0x10, 0x11 > iMOVDQU;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
const MovhlImplAll< 0, 0x16 > iMOVHPS;
|
|
|
|
const MovhlImplAll< 0, 0x12 > iMOVLPS;
|
|
|
|
const MovhlImplAll< 0x66, 0x16 > iMOVHPD;
|
|
|
|
const MovhlImplAll< 0x66, 0x12 > iMOVLPD;
|
|
|
|
|
2009-04-19 20:14:53 +00:00
|
|
|
const PLogicImplAll<0xdb> iPAND;
|
|
|
|
const PLogicImplAll<0xdf> iPANDN;
|
|
|
|
const PLogicImplAll<0xeb> iPOR;
|
|
|
|
const PLogicImplAll<0xef> iPXOR;
|
|
|
|
|
2009-04-20 00:06:51 +00:00
|
|
|
const SSEAndNotImpl<0x55> iANDN;
|
2009-04-19 20:14:53 +00:00
|
|
|
|
|
|
|
// Compute Reciprocal Packed Single-Precision Floating-Point Values
|
2009-04-20 00:06:51 +00:00
|
|
|
const SSELogicImpl<0,0x53> iRCPPS;
|
2009-04-19 20:14:53 +00:00
|
|
|
|
|
|
|
// Compute Reciprocal of Scalar Single-Precision Floating-Point Value
|
2009-04-20 00:06:51 +00:00
|
|
|
const SSELogicImpl<0xf3,0x53> iRCPSS;
|
2009-04-19 20:14:53 +00:00
|
|
|
|
2009-04-19 02:14:50 +00:00
|
|
|
|
|
|
|
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
|
|
|
// being cleared to zero.
|
2009-04-19 16:34:29 +00:00
|
|
|
__forceinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); }
|
2009-04-19 02:14:50 +00:00
|
|
|
|
|
|
|
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
|
|
|
// being cleared to zero.
|
2009-04-20 00:06:51 +00:00
|
|
|
__forceinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); }
|
2009-04-19 02:14:50 +00:00
|
|
|
|
|
|
|
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
|
|
|
// being cleared to zero.
|
2009-04-19 16:34:29 +00:00
|
|
|
__forceinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); }
|
|
|
|
|
2009-04-20 00:06:51 +00:00
|
|
|
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
|
|
|
__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); }
|
|
|
|
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
|
|
|
__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); }
|
|
|
|
|
|
|
|
__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); }
|
|
|
|
__forceinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); }
|
2009-04-19 16:34:29 +00:00
|
|
|
__forceinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); }
|
|
|
|
__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); }
|
|
|
|
__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); }
|
2009-04-20 00:06:51 +00:00
|
|
|
|
|
|
|
// This form of iMOVQ is Intel's adeptly named 'MOVQ2DQ'
|
2009-04-19 16:34:29 +00:00
|
|
|
__forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); }
|
2009-04-20 00:06:51 +00:00
|
|
|
|
|
|
|
// This form of iMOVQ is Intel's adeptly named 'MOVDQ2Q'
|
2009-04-19 05:24:20 +00:00
|
|
|
__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from )
|
2009-04-19 02:14:50 +00:00
|
|
|
{
|
2009-04-19 05:24:20 +00:00
|
|
|
// Manual implementation of this form of MOVQ, since its parameters are unique in a way
|
|
|
|
// that breaks the template inference of writeXMMop();
|
2009-04-19 02:14:50 +00:00
|
|
|
|
2009-04-19 05:24:20 +00:00
|
|
|
SimdPrefix<u128>( 0xd6, 0xf2 );
|
|
|
|
ModRM_Direct( to.Id, from.Id );
|
2009-04-19 02:14:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
|
2009-04-19 05:24:20 +00:00
|
|
|
#define IMPLEMENT_iMOVS( ssd, prefix ) \
|
2009-04-19 16:34:29 +00:00
|
|
|
__forceinline void iMOV##ssd( const iRegisterSSE& to, const iRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \
|
|
|
|
__forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \
|
|
|
|
__forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \
|
|
|
|
__forceinline void iMOV##ssd( const void* to, const iRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \
|
|
|
|
__forceinline void iMOV##ssd( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); }
|
2009-04-19 02:14:50 +00:00
|
|
|
|
2009-04-19 05:24:20 +00:00
|
|
|
IMPLEMENT_iMOVS( SS, 0xf3 )
|
|
|
|
IMPLEMENT_iMOVS( SD, 0xf2 )
|
2009-04-19 02:14:50 +00:00
|
|
|
|
2009-04-19 05:24:20 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
2009-04-19 16:34:29 +00:00
|
|
|
// Non-temporal movs only support a register as a target (ie, load form only, no stores)
|
2009-04-19 05:24:20 +00:00
|
|
|
//
|
2009-04-08 06:25:40 +00:00
|
|
|
|
2009-04-19 16:34:29 +00:00
|
|
|
__forceinline void iMOVNTDQA( const iRegisterSSE& to, const void* from )
|
|
|
|
{
|
|
|
|
iWrite<u32>( 0x2A380f66 );
|
|
|
|
iWriteDisp( to.Id, from );
|
|
|
|
}
|
|
|
|
|
|
|
|
__noinline void iMOVNTDQA( const iRegisterSSE& to, const ModSibBase& from )
|
|
|
|
{
|
|
|
|
iWrite<u32>( 0x2A380f66 );
|
|
|
|
EmitSibMagic( to.Id, from );
|
|
|
|
}
|
|
|
|
|
|
|
|
__forceinline void iMOVNTDQ( void* to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); }
|
|
|
|
__noinline void iMOVNTDQA( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); }
|
|
|
|
|
|
|
|
__forceinline void iMOVNTPD( void* to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); }
|
|
|
|
__noinline void iMOVNTPD( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); }
|
|
|
|
__forceinline void iMOVNTPS( void* to, const iRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); }
|
|
|
|
__noinline void iMOVNTPS( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); }
|
|
|
|
|
|
|
|
__forceinline void iMOVNTQ( void* to, const iRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
|
|
|
__noinline void iMOVNTQ( const ModSibBase& to, const iRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); }
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Mov Low to High / High to Low
|
|
|
|
//
|
|
|
|
// These instructions come in xmmreg,xmmreg forms only!
|
|
|
|
//
|
|
|
|
|
|
|
|
__forceinline void iMOVLHPS( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x16, to, from ); }
|
|
|
|
__forceinline void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x12, to, from ); }
|
|
|
|
__forceinline void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x16, to, from ); }
|
|
|
|
__forceinline void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x12, to, from ); }
|
|
|
|
|
|
|
|
|
2009-04-07 08:42:25 +00:00
|
|
|
}
|