Emitter: Fixed a small bug in the ModSib emitter logic which caused some forms of ESP to encode incorrectly. Implemented CALL/BSF/BSR [x86], MOVQ, MOVD, and MOVSS [mmx/xmm]. Renamed ix86_mmx.cpp and i86_sse.cpp to ix86_legacy_mmx.cpp and i8x_legacy_sse.cpp.

iMMI.cpp: Removed mucho dead pre-SSE2 code. (old code can be grabbed from a tagged revision, in case it's useful for future reference).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1011 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-19 02:14:50 +00:00
parent 924869f765
commit 6f99ec45b9
22 changed files with 1090 additions and 1627 deletions

View File

@ -86,6 +86,7 @@ typedef unsigned int uint;
#define __naked __declspec(naked)
#define __unused /*unused*/
#define __noinline __declspec(noinline)
#define CALLBACK __stdcall
#else // _MSC_VER
@ -134,6 +135,7 @@ typedef union _LARGE_INTEGER
#define __unused __attribute__((unused))
#define _inline __inline__ __attribute__((unused))
#define __forceinline __attribute__((always_inline,unused))
#define __noinline __attribute__((noinline))
#define __naked // GCC lacks the naked specifier
#define CALLBACK // CALLBACK is win32-specific mess

View File

@ -2953,18 +2953,18 @@
RelativePath="..\..\x86\ix86\ix86_legacy_internal.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_mmx.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_sse.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_legacy_types.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_mmx.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_sse.cpp"
>
</File>
<File
RelativePath="..\..\x86\ix86\ix86_sse_helpers.h"
>
@ -3004,6 +3004,10 @@
RelativePath="..\..\x86\ix86\implement\incdec.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\implement\jmpcall.h"
>
</File>
<File
RelativePath="..\..\x86\ix86\implement\movs.h"
>
@ -3012,12 +3016,16 @@
RelativePath="..\..\x86\ix86\implement\test.h"
>
</File>
<Filter
Name="xmm"
>
<File
RelativePath="..\..\x86\ix86\implement\xmm\movqss.h"
>
</File>
</Filter>
</Filter>
</Filter>
<File
RelativePath=".\ClassDiagram1.cd"
>
</File>
<File
RelativePath="..\..\Common.h"
>

File diff suppressed because it is too large Load Diff

View File

@ -249,19 +249,6 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode
#define XMMINFO_READACC 0x200
#define XMMINFO_WRITEACC 0x400
#define CPU_SSE_XMMCACHE_START(xmminfo) \
{ \
int info = eeRecompileCodeXMM(xmminfo); \
#define CPU_SSE2_XMMCACHE_START(xmminfo) \
{ \
int info = eeRecompileCodeXMM(xmminfo); \
#define CPU_SSE_XMMCACHE_END \
_clearNeededXMMregs(); \
return; \
} \
#define FPURECOMPILE_CONSTCODE(fn, xmminfo) \
void rec##fn(void) \
{ \

View File

@ -2,7 +2,7 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include
noinst_LIBRARIES = libix86.a
libix86_a_SOURCES = \
ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \
ix86_legacy.cpp ix86_sse.cpp \
ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_legacy_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \
ix86_legacy.cpp ix86_legacy_sse.cpp \
ix86_internal.h ix86_legacy_instructions.h ix86_macros.h ix86_sse_helpers.h ix86.h ix86_legacy_internal.h \
ix86_instructions.h ix86_legacy_types.h ix86_types.h

View File

@ -18,7 +18,7 @@
#pragma once
// Implementations found here: BTS/BT/BTC/BTR!
// Implementations found here: BTS/BT/BTC/BTR plus BSF/BSR!
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
// These instructions are in the 'Group8' as per Intel's manual, but since they all have
@ -46,6 +46,7 @@ protected:
public:
Group8Impl() {} // For the love of GCC.
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& bitbase, const iRegister<ImmType>& bitoffset )
{
prefix16();
@ -54,6 +55,7 @@ public:
ModRM_Direct( bitoffset.Id, bitbase.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( void* bitbase, const iRegister<ImmType>& bitoffset )
{
prefix16();
@ -62,6 +64,7 @@ public:
iWriteDisp( bitoffset.Id, bitbase.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibBase& bitbase, const iRegister<ImmType>& bitoffset )
{
prefix16();
@ -70,6 +73,7 @@ public:
EmitSibMagic( bitoffset.Id, bitbase );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& bitbase, u8 immoffset )
{
prefix16();
@ -78,6 +82,7 @@ public:
iWrite<u8>( immoffset );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibStrict<ImmType>& bitbase, u8 immoffset )
{
prefix16();
@ -115,3 +120,68 @@ public:
Group8ImplAll() {}
};
//////////////////////////////////////////////////////////////////////////////////////////
// BSF / BSR -- 16/32 operands supported only.
//
template< bool isReverse, typename ImmType >
class BitScanImpl
{
protected:
static const uint OperandSize = sizeof(ImmType);
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
static void emitbase()
{
prefix16();
iWrite<u8>( 0x0f );
iWrite<u8>( isReverse ? 0xbd : 0xbc );
}
public:
BitScanImpl() {} // For the love of GCC.
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to, const iRegister<ImmType>& from )
{
emitbase();
ModRM_Direct( to.Id, from.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to, const void* src )
{
emitbase();
iWriteDisp( to.Id, src );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to, const ModSibBase& sibsrc )
{
emitbase();
EmitSibMagic( to.Id, sibsrc );
}
};
// -------------------------------------------------------------------
// BSF/BSR -- 16 and 32 bit operand forms only!
//
template< bool isReverse >
class BitScanImplAll
{
protected:
typedef BitScanImpl<isReverse,u32> m_32;
typedef BitScanImpl<isReverse,u32> m_16;
public:
__forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( to, from ); }
__forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); }
__forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( to, src ); }
__forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( to, src ); }
__noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); }
__noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); }
BitScanImplAll() {}
};

View File

@ -45,6 +45,7 @@ protected:
public:
DwordShiftImpl() {} // because GCC doesn't like static classes
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to, const iRegister<ImmType>& from )
{
prefix16();
@ -52,6 +53,7 @@ public:
ModRM_Direct( from.Id, to.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to, const iRegister<ImmType>& from, u8 imm )
{
if( imm == 0 ) return;
@ -61,12 +63,14 @@ public:
write8( imm );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibBase& sibdest, const iRegister<ImmType>& from, __unused const iRegisterCL& clreg )
{
basesibform();
EmitSibMagic( from.Id, sibdest );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibBase& sibdest, const iRegister<ImmType>& from, u8 imm )
{
basesibform();
@ -74,6 +78,7 @@ public:
write8( imm );
}
// ------------------------------------------------------------------------
// dest data type is inferred from the 'from' register, so we can do void* resolution :)
static __emitinline void Emit( void* dest, const iRegister<ImmType>& from, __unused const iRegisterCL& clreg )
{
@ -81,6 +86,7 @@ public:
iWriteDisp( from.Id, dest );
}
// ------------------------------------------------------------------------
// dest data type is inferred from the 'from' register, so we can do void* resolution :)
static __emitinline void Emit( void* dest, const iRegister<ImmType>& from, u8 imm )
{

View File

@ -50,6 +50,7 @@ protected:
public:
Group2Impl() {} // For the love of GCC.
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to )
{
prefix16();
@ -57,6 +58,7 @@ public:
ModRM_Direct( InstType, to.Id );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const iRegister<ImmType>& to, u8 imm )
{
if( imm == 0 ) return;
@ -76,6 +78,7 @@ public:
}
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibStrict<ImmType>& sibdest )
{
prefix16();
@ -83,6 +86,7 @@ public:
EmitSibMagic( InstType, sibdest );
}
// ------------------------------------------------------------------------
static __emitinline void Emit( const ModSibStrict<ImmType>& sibdest, u8 imm )
{
if( imm == 0 ) return;
@ -108,17 +112,7 @@ public:
template< G2Type InstType >
class Group2ImplAll
{
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// creation of the compiler.
//
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
public:
// ---------- 32 Bit Interface -----------
template< typename T > __forceinline void operator()( const iRegister<T>& to, __unused const iRegisterCL& from ) const
{ Group2Impl<InstType,T>::Emit( to ); }

View File

@ -51,8 +51,9 @@ public:
static __emitinline void Emit( bool isDec, const ModSibStrict<ImmType>& dest )
{
prefix16();
write8( Is8BitOperand() ? 0xfe : 0xff );
EmitSibMagic( isDec ? 1: 0, dest );
EmitSibMagic( isDec ? 1 : 0, dest );
}
};
@ -67,10 +68,10 @@ protected:
public:
__forceinline void operator()( const iRegister32& to ) const { m_32::Emit( isDec, to ); }
__noinline void operator()( const ModSibStrict<u32>& sibdest ) const { m_32::Emit( isDec, sibdest ); }
__noinline void operator()( const ModSibStrict<u32>& sibdest ) const{ m_32::Emit( isDec, sibdest ); }
__forceinline void operator()( const iRegister16& to ) const { m_16::Emit( isDec, to ); }
__noinline void operator()( const ModSibStrict<u16>& sibdest ) const { m_16::Emit( isDec, sibdest ); }
__noinline void operator()( const ModSibStrict<u16>& sibdest ) const{ m_16::Emit( isDec, sibdest ); }
__forceinline void operator()( const iRegister8& to ) const { m_8::Emit( isDec, to ); }
__noinline void operator()( const ModSibStrict<u8>& sibdest ) const { m_8::Emit( isDec, sibdest ); }

View File

@ -0,0 +1,85 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
// Implementations found here: CALL and JMP! (unconditional only)
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
template< typename ImmType >
class JmpCallImpl
{
protected:
static const uint OperandSize = sizeof(ImmType);
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
JmpCallImpl() {} // For the love of GCC.
static __emitinline void Emit( bool isJmp, const iRegister<ImmType>& absreg )
{
prefix16();
iWrite<u8>( 0xff );
ModRM_Direct( isJmp ? 4 : 2, absreg.Id );
}
static __emitinline void Emit( bool isJmp, const ModSibStrict<ImmType>& src )
{
prefix16();
iWrite<u8>( 0xff );
EmitSibMagic( isJmp ? 4 : 2, src );
}
};
// ------------------------------------------------------------------------
template< bool isJmp >
class JmpCallImplAll
{
protected:
typedef JmpCallImpl<u32> m_32;
typedef JmpCallImpl<u16> m_16;
public:
JmpCallImplAll() {}
__forceinline void operator()( const iRegister32& absreg ) const { m_32::Emit( isJmp, absreg ); }
__forceinline void operator()( const ModSibStrict<u32>& src ) const { m_32::Emit( isJmp, src ); }
__forceinline void operator()( const iRegister16& absreg ) const { m_16::Emit( isJmp, absreg ); }
__forceinline void operator()( const ModSibStrict<u16>& src ) const { m_16::Emit( isJmp, src ); }
// Special form for calling functions. This form automatically resolves the
// correct displacement based on the size of the instruction being generated.
template< typename T >
__forceinline void operator()( const T* func ) const
{
if( isJmp )
iJccKnownTarget( Jcc_Unconditional, (void*)func );
else
{
// calls are relative to the instruction after this one, and length is
// always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic).
sptr dest = (sptr)func - ((sptr)iGetPtr() + 5);
iWrite<u8>( 0xe8 );
iWrite<u32>( dest );
}
}
};

View File

@ -18,7 +18,9 @@
#pragma once
// Header: ix86_impl_movs.h -- covers cmov and movsx/movzx.
// Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares
// with cmov many similarities).
// Note: This header is meant to be included from within the x86Emitter::Internal namespace.
//////////////////////////////////////////////////////////////////////////////////////////
@ -143,28 +145,22 @@ public:
}
};
// Inlining Notes:
// I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// cretion of the compiler.
//
// ------------------------------------------------------------------------
class MovImplAll
{
public:
template< typename T>
template< typename T >
__forceinline void operator()( const iRegister<T>& to, const iRegister<T>& from ) const { MovImpl<T>::Emit( to, from ); }
template< typename T>
template< typename T >
__forceinline void operator()( const iRegister<T>& to, const void* src ) const { MovImpl<T>::Emit( to, src ); }
template< typename T>
template< typename T >
__forceinline void operator()( void* dest, const iRegister<T>& from ) const { MovImpl<T>::Emit( dest, from ); }
template< typename T>
template< typename T >
__noinline void operator()( const ModSibBase& sibdest, const iRegister<T>& from ) const { MovImpl<T>::Emit( sibdest, from ); }
template< typename T>
template< typename T >
__noinline void operator()( const iRegister<T>& to, const ModSibBase& sibsrc ) const { MovImpl<T>::Emit( to, sibsrc ); }
template< typename T>
template< typename T >
__noinline void operator()( const ModSibStrict<T>& sibdest, int imm ) const { MovImpl<T>::Emit( sibdest, imm ); }
// preserve_flags - set to true to disable optimizations which could alter the state of
@ -184,9 +180,11 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////
// CMOV !! [in all of it's disappointing lack-of glory]
// Caution! This instruction can look exciting and cool, until you realize that it cannot
// load immediate values into registers. -_-
// CMOV !! [in all of it's disappointing lack-of glory] .. and ..
// SETcc!! [more glory, less lack!]
//
// CMOV Disclaimer: Caution! This instruction can look exciting and cool, until you
// realize that it cannot load immediate values into registers. -_-
//
template< typename ImmType, int InstBaseVal >
class CMovSetImpl

View File

@ -19,7 +19,7 @@
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////
// MOV instruction Implementation
// TEST instruction Implementation
template< typename ImmType >
class TestImpl

View File

@ -0,0 +1,109 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
// This helper function is used for instructions which enter XMM form when the 0x66 prefix
// is specified (indicating alternate operand type selection).
template< typename OperandType >
static __forceinline void preXMM( u8 opcode )
{
if( sizeof( OperandType ) == 16 )
iWrite<u16>( 0x0f66 );
else
iWrite<u8>( 0x0f );
iWrite<u8>( opcode );
}
// prefix - 0 indicates MMX, anything assumes XMM.
static __forceinline void SimdPrefix( u8 opcode, u8 prefix=0 )
{
if( prefix != 0 )
{
iWrite<u16>( 0x0f00 | prefix );
iWrite<u8>( opcode );
}
else
iWrite<u16>( (opcode<<8) | 0x0f );
}
template< u8 prefix, typename T, typename T2 >
static __forceinline void writeXMMop( const iRegister<T>& to, const iRegister<T2>& from, u8 opcode )
{
SimdPrefix( opcode, prefix );
ModRM_Direct( to.Id, from.Id );
}
template< u8 prefix, typename T >
static __noinline void writeXMMop( const iRegister<T>& reg, const ModSibBase& sib, u8 opcode )
{
SimdPrefix( opcode, prefix );
EmitSibMagic( reg.Id, sib );
}
template< u8 prefix, typename T >
static __forceinline void writeXMMop( const iRegister<T>& reg, const void* data, u8 opcode )
{
SimdPrefix( opcode, prefix );
iWriteDisp( reg.Id, data );
}
// ------------------------------------------------------------------------
// MOVD has valid forms for MMX and XMM registers.
//
template< typename T >
static __forceinline void iMOVDZX( const iRegisterSIMD<T>& to, const iRegister32& from )
{
preXMM<T>( 0x6e );
ModRM_Direct( to.Id, from.Id );
}
template< typename T>
static __forceinline void iMOVDZX( const iRegisterSIMD<T>& to, const void* src )
{
preXMM<T>( 0x6e );
iWriteDisp( to.Id, src );
}
template< typename T>
static __forceinline void iMOVDZX( const iRegisterSIMD<T>& to, const ModSibBase& src )
{
preXMM<T>( 0x6e );
EmitSibMagic( to.Id, src );
}
template< typename T>
static __emitinline void iMOVD( const iRegister32& to, const iRegisterSIMD<T>& from )
{
preXMM<T>( 0x7e );
ModRM_Direct( from.Id, to.Id );
}
template< typename T>
static __forceinline void iMOVD( void* dest, const iRegisterSIMD<T>& from )
{
preXMM<T>( 0x7e );
iWriteDisp( from.Id, dest );
}
template< typename T>
static __noinline void iMOVD( const ModSibBase& dest, const iRegisterSIMD<T>& from )
{
preXMM<T>( 0x7e );
EmitSibMagic( from.Id, dest );
}

View File

@ -66,10 +66,10 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT };
namespace x86Emitter {
const x86IndexerType ptr;
const x86IndexerTypeExplicit<u32> ptr32;
const x86IndexerTypeExplicit<u16> ptr16;
const x86IndexerTypeExplicit<u8> ptr8;
const iAddressIndexerBase ptr;
const iAddressIndexer<u32> ptr32;
const iAddressIndexer<u16> ptr16;
const iAddressIndexer<u8> ptr8;
// ------------------------------------------------------------------------
@ -280,6 +280,9 @@ const Group8ImplAll<G8Type_BTR> iBTR;
const Group8ImplAll<G8Type_BTS> iBTS;
const Group8ImplAll<G8Type_BTC> iBTC;
const BitScanImplAll<false> iBSF;
const BitScanImplAll<true> iBSR;
// ------------------------------------------------------------------------
const CMovImplGeneric iCMOV;
@ -607,8 +610,6 @@ __forceinline void iSMUL( const iRegister16& to, const iRegister16& from, s16 im
__noinline void iSMUL( const iRegister16& to, const ModSibBase& src ) { iMUL16::Emit( to, src ); }
__noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) { iMUL16::Emit( to, from, imm ); }
//////////////////////////////////////////////////////////////////////////////////////////
// Push / Pop Emitters
//
@ -627,5 +628,112 @@ __emitinline void iPUSH( const ModSibBase& from )
EmitSibMagic( 6, from );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
__emitinline void iBSWAP( const iRegister32& to )
{
write8( 0x0F );
write8( 0xC8 | to.Id );
}
//////////////////////////////////////////////////////////////////////////////////////////
// MMX / XMM Instructions
// (these will get put in their own file later)
__emitinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from )
{
writeXMMop<0>( to, from, 0x6f );
}
__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src )
{
writeXMMop<0>( to, src, 0x6f );
}
__emitinline void iMOVQ( const iRegisterMMX& to, const void* src )
{
writeXMMop<0>( to, src, 0x6f );
}
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__emitinline void iMOVQZX( const iRegisterXMM& to, const iRegisterXMM& from )
{
writeXMMop<0xf3>( to, from, 0x7e );
}
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__noinline void iMOVQZX( const iRegisterXMM& to, const ModSibBase& src )
{
writeXMMop<0xf3>( to, src, 0x7e );
}
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
// being cleared to zero.
__emitinline void iMOVQZX( const iRegisterXMM& to, const void* src )
{
writeXMMop<0xf3>( to, src, 0x7e );
}
__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from )
{
writeXMMop<0>( from, dest, 0x7f );
}
__forceinline void iMOVQ( void* dest, const iRegisterMMX& from )
{
writeXMMop<0>( from, dest, 0x7f );
}
__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterXMM& from )
{
writeXMMop<0xf3>( from, dest, 0x7e );
}
__forceinline void iMOVQ( void* dest, const iRegisterXMM& from )
{
writeXMMop<0xf3>( from, dest, 0x7e );
}
__forceinline void iMOVQ( const iRegisterXMM& to, const iRegisterMMX& from )
{
writeXMMop<0xf3>( to, from, 0xd6 );
}
__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterXMM& from )
{
writeXMMop<0xf2>( to, from, 0xd6 );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
__forceinline void iMOVSS( const iRegisterXMM& to, const iRegisterXMM& from )
{
if( to != from )
writeXMMop<0xf3>( to, from, 0x10 );
}
__forceinline void iMOVSSZX( const iRegisterXMM& to, const void* from )
{
writeXMMop<0xf3>( to, from, 0x10 );
}
__forceinline void iMOVSSZX( const iRegisterXMM& to, const ModSibBase& from )
{
writeXMMop<0xf3>( to, from, 0x10 );
}
__forceinline void iMOVSS( const void* to, const iRegisterXMM& from )
{
writeXMMop<0xf3>( from, to, 0x11 );
}
__forceinline void iMOVSS( const ModSibBase& to, const iRegisterXMM& from )
{
writeXMMop<0xf3>( from, to, 0x11 );
}
}

View File

@ -121,15 +121,30 @@ namespace x86Emitter
// fashion.
__forceinline void ModSibBase::Reduce()
{
if( Index.IsStackPointer() )
{
// esp cannot be encoded as the index, so move it to the Base, if possible.
// note: intentionally leave index assigned to esp also (generates correct
// encoding later, since ESP cannot be encoded 'alone')
jASSUME( Scale == 0 ); // esp can't have an index modifier!
jASSUME( Base.IsEmpty() ); // base must be empty or else!
Base = Index;
return;
}
// If no index reg, then load the base register into the index slot.
if( Index.IsEmpty() )
{
Index = Base;
Scale = 0;
Base = x86IndexReg::Empty;
if( !Base.IsStackPointer() ) // prevent ESP from being encoded 'alone'
Base = x86IndexReg::Empty;
return;
}
// The Scale has a series of valid forms, all shown here:
switch( Scale )
@ -167,17 +182,6 @@ namespace x86Emitter
Scale = 3;
break;
}
if( Index.IsStackPointer() )
{
// esp cannot be encoded as the index, so move it to the Base, if possible.
jASSUME( Scale == 0 );
jASSUME( Base.IsEmpty() );
Base = Index;
// noe: leave index assigned to esp also (generates correct encoding later)
//Index = x86IndexReg::Empty;
}
}
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -35,7 +35,127 @@
namespace x86Emitter
{
extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false );
// ------------------------------------------------------------------------
// Group 1 Instruction Class
extern const Internal::Group1ImplAll<Internal::G1Type_ADD> iADD;
extern const Internal::Group1ImplAll<Internal::G1Type_OR> iOR;
extern const Internal::Group1ImplAll<Internal::G1Type_ADC> iADC;
extern const Internal::Group1ImplAll<Internal::G1Type_SBB> iSBB;
extern const Internal::Group1ImplAll<Internal::G1Type_AND> iAND;
extern const Internal::Group1ImplAll<Internal::G1Type_SUB> iSUB;
extern const Internal::Group1ImplAll<Internal::G1Type_XOR> iXOR;
extern const Internal::Group1ImplAll<Internal::G1Type_CMP> iCMP;
// ------------------------------------------------------------------------
// Group 2 Instruction Class
//
// Optimization Note: For Imm forms, we ignore the instruction if the shift count is
// zero. This is a safe optimization since any zero-value shift does not affect any
// flags.
extern const Internal::MovImplAll iMOV;
extern const Internal::TestImplAll iTEST;
extern const Internal::Group2ImplAll<Internal::G2Type_ROL> iROL;
extern const Internal::Group2ImplAll<Internal::G2Type_ROR> iROR;
extern const Internal::Group2ImplAll<Internal::G2Type_RCL> iRCL;
extern const Internal::Group2ImplAll<Internal::G2Type_RCR> iRCR;
extern const Internal::Group2ImplAll<Internal::G2Type_SHL> iSHL;
extern const Internal::Group2ImplAll<Internal::G2Type_SHR> iSHR;
extern const Internal::Group2ImplAll<Internal::G2Type_SAR> iSAR;
// ------------------------------------------------------------------------
// Group 3 Instruction Class
extern const Internal::Group3ImplAll<Internal::G3Type_NOT> iNOT;
extern const Internal::Group3ImplAll<Internal::G3Type_NEG> iNEG;
extern const Internal::Group3ImplAll<Internal::G3Type_MUL> iUMUL;
extern const Internal::Group3ImplAll<Internal::G3Type_DIV> iUDIV;
extern const Internal::Group3ImplAll<Internal::G3Type_iDIV> iSDIV;
extern const Internal::IncDecImplAll<false> iINC;
extern const Internal::IncDecImplAll<true> iDEC;
extern const Internal::MovExtendImplAll<false> iMOVZX;
extern const Internal::MovExtendImplAll<true> iMOVSX;
extern const Internal::DwordShiftImplAll<false> iSHLD;
extern const Internal::DwordShiftImplAll<true> iSHRD;
extern const Internal::Group8ImplAll<Internal::G8Type_BT> iBT;
extern const Internal::Group8ImplAll<Internal::G8Type_BTR> iBTR;
extern const Internal::Group8ImplAll<Internal::G8Type_BTS> iBTS;
extern const Internal::Group8ImplAll<Internal::G8Type_BTC> iBTC;
extern const Internal::JmpCallImplAll<true> iJMP;
extern const Internal::JmpCallImplAll<false> iCALL;
extern const Internal::BitScanImplAll<false> iBSF;
extern const Internal::BitScanImplAll<true> iBSR;
// ------------------------------------------------------------------------
extern const Internal::CMovImplGeneric iCMOV;
extern const Internal::CMovImplAll<Jcc_Above> iCMOVA;
extern const Internal::CMovImplAll<Jcc_AboveOrEqual> iCMOVAE;
extern const Internal::CMovImplAll<Jcc_Below> iCMOVB;
extern const Internal::CMovImplAll<Jcc_BelowOrEqual> iCMOVBE;
extern const Internal::CMovImplAll<Jcc_Greater> iCMOVG;
extern const Internal::CMovImplAll<Jcc_GreaterOrEqual> iCMOVGE;
extern const Internal::CMovImplAll<Jcc_Less> iCMOVL;
extern const Internal::CMovImplAll<Jcc_LessOrEqual> iCMOVLE;
extern const Internal::CMovImplAll<Jcc_Zero> iCMOVZ;
extern const Internal::CMovImplAll<Jcc_Equal> iCMOVE;
extern const Internal::CMovImplAll<Jcc_NotZero> iCMOVNZ;
extern const Internal::CMovImplAll<Jcc_NotEqual> iCMOVNE;
extern const Internal::CMovImplAll<Jcc_Overflow> iCMOVO;
extern const Internal::CMovImplAll<Jcc_NotOverflow> iCMOVNO;
extern const Internal::CMovImplAll<Jcc_Carry> iCMOVC;
extern const Internal::CMovImplAll<Jcc_NotCarry> iCMOVNC;
extern const Internal::CMovImplAll<Jcc_Signed> iCMOVS;
extern const Internal::CMovImplAll<Jcc_Unsigned> iCMOVNS;
extern const Internal::CMovImplAll<Jcc_ParityEven> iCMOVPE;
extern const Internal::CMovImplAll<Jcc_ParityOdd> iCMOVPO;
// ------------------------------------------------------------------------
extern const Internal::SetImplGeneric iSET;
extern const Internal::SetImplAll<Jcc_Above> iSETA;
extern const Internal::SetImplAll<Jcc_AboveOrEqual> iSETAE;
extern const Internal::SetImplAll<Jcc_Below> iSETB;
extern const Internal::SetImplAll<Jcc_BelowOrEqual> iSETBE;
extern const Internal::SetImplAll<Jcc_Greater> iSETG;
extern const Internal::SetImplAll<Jcc_GreaterOrEqual> iSETGE;
extern const Internal::SetImplAll<Jcc_Less> iSETL;
extern const Internal::SetImplAll<Jcc_LessOrEqual> iSETLE;
extern const Internal::SetImplAll<Jcc_Zero> iSETZ;
extern const Internal::SetImplAll<Jcc_Equal> iSETE;
extern const Internal::SetImplAll<Jcc_NotZero> iSETNZ;
extern const Internal::SetImplAll<Jcc_NotEqual> iSETNE;
extern const Internal::SetImplAll<Jcc_Overflow> iSETO;
extern const Internal::SetImplAll<Jcc_NotOverflow> iSETNO;
extern const Internal::SetImplAll<Jcc_Carry> iSETC;
extern const Internal::SetImplAll<Jcc_NotCarry> iSETNC;
extern const Internal::SetImplAll<Jcc_Signed> iSETS;
extern const Internal::SetImplAll<Jcc_Unsigned> iSETNS;
extern const Internal::SetImplAll<Jcc_ParityEven> iSETPE;
extern const Internal::SetImplAll<Jcc_ParityOdd> iSETPO;
//////////////////////////////////////////////////////////////////////////////////////////
// Miscellaneous Instructions
// These are all defined inline or in ix86.cpp.
//
extern void iBSWAP( const iRegister32& to );
// ----- Lea Instructions (Load Effective Address) -----
// Note: alternate (void*) forms of these instructions are not provided since those
@ -81,7 +201,7 @@ namespace x86Emitter
// NOP 1-byte
__forceinline void iNOP() { write8(0x90); }
//////////////////////////////////////////////////////////////////////////////////////////
// MUL / DIV instructions
@ -105,6 +225,8 @@ namespace x86Emitter
//////////////////////////////////////////////////////////////////////////////////////////
// JMP / Jcc Instructions!
extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false );
#define DEFINE_FORWARD_JUMP( label, cond ) \
template< typename OperandType > \
class iForward##label : public iForwardJump<OperandType> \
@ -193,5 +315,40 @@ namespace x86Emitter
typedef iForwardJPE<s32> iForwardJPE32;
typedef iForwardJPO<s8> iForwardJPO8;
typedef iForwardJPO<s32> iForwardJPO32;
//////////////////////////////////////////////////////////////////////////////////////////
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
//
// Notes:
// * Some of the functions have been renamed to more clearly reflect what they actually
// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination
// since that's what they do (MOVD clears upper 32/96 bits, etc).
//
using Internal::iMOVD;
using Internal::iMOVDZX;
extern void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from );
extern void iMOVQ( const iRegisterMMX& to, const iRegisterXMM& from );
extern void iMOVQ( const iRegisterXMM& to, const iRegisterMMX& from );
extern void iMOVQ( void* dest, const iRegisterXMM& from );
extern void iMOVQ( const ModSibBase& dest, const iRegisterXMM& from );
extern void iMOVQ( void* dest, const iRegisterMMX& from );
extern void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from );
extern void iMOVQ( const iRegisterMMX& to, const void* src );
extern void iMOVQ( const iRegisterMMX& to, const ModSibBase& src );
extern void iMOVQZX( const iRegisterXMM& to, const void* src );
extern void iMOVQZX( const iRegisterXMM& to, const ModSibBase& src );
extern void iMOVQZX( const iRegisterXMM& to, const iRegisterXMM& from );
extern void iMOVSS( const iRegisterXMM& to, const iRegisterXMM& from );
extern void iMOVSS( const void* to, const iRegisterXMM& from );
extern void iMOVSS( const ModSibBase& to, const iRegisterXMM& from );
extern void iMOVSSZX( const iRegisterXMM& to, const void* from );
extern void iMOVSSZX( const iRegisterXMM& to, const ModSibBase& from );
}

View File

@ -38,19 +38,20 @@
namespace x86Emitter {
using namespace Internal;
const JmpCallImplAll<true> iJMP;
const JmpCallImplAll<false> iCALL;
// ------------------------------------------------------------------------
void iSmartJump::SetTarget()
{
jASSUME( !m_written );
if( m_written )
throw Exception::InvalidOperation( "Attempted to set SmartJump label multiple times." );
m_target = iGetPtr();
u8* target = iGetPtr();
if( m_baseptr == NULL ) return;
iSetPtr( m_baseptr );
u8* const saveme = m_baseptr + GetMaxInstructionSize();
iJccKnownTarget( m_cc, m_target, true );
iJccKnownTarget( m_cc, target, true );
// Copy recompiled data inward if the jump instruction didn't fill the
// alloted buffer (means that we optimized things to a j8!)
@ -59,17 +60,19 @@ void iSmartJump::SetTarget()
if( spacer != 0 )
{
u8* destpos = iGetPtr();
const int copylen = (sptr)m_target - (sptr)saveme;
const int copylen = (sptr)target - (sptr)saveme;
memcpy_fast( destpos, saveme, copylen );
iSetPtr( m_target - spacer );
iSetPtr( target - spacer );
}
m_written = true;
}
//////////////////////////////////////////////////////////////////////////////////////////
//
iSmartJump::~iSmartJump()
{
SetTarget();
m_baseptr = NULL; // just in case (sometimes helps in debugging too)
}
// ------------------------------------------------------------------------
// Writes a jump at the current x86Ptr, which targets a pre-established target address.
@ -78,6 +81,7 @@ void iSmartJump::SetTarget()
// slideForward - used internally by iSmartJump to indicate that the jump target is going
// to slide forward in the event of an 8 bit displacement.
//
// Using this
__emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward )
{
// Calculate the potential j8 displacement first, assuming an instruction length of 2:
@ -110,9 +114,4 @@ __emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, b
}
}
__emitinline void iJcc( JccComparisonType comparison, void* target )
{
iJccKnownTarget( comparison, target );
}
}

View File

@ -445,18 +445,6 @@ emitterT void NOP( void ) { iNOP(); }
// jump instructions /
////////////////////////////////////
emitterT u8* JMP( uptr to ) {
uptr jump = ( x86Ptr - (u8*)to ) - 1;
if ( jump > 0x7f ) {
assert( to <= 0xffffffff );
return (u8*)JMP32( to );
}
else {
return (u8*)JMP8( to );
}
}
/* jmp rel8 */
emitterT u8* JMP8( u8 to )
{
@ -477,17 +465,13 @@ emitterT u32* JMP32( uptr to )
/* jmp r32/r64 */
emitterT void JMPR( x86IntRegType to )
{
RexB(0, to);
write8( 0xFF );
ModRM( 3, 4, to );
iJMP( iRegister32(to) );
}
// jmp m32
emitterT void JMP32M( uptr to )
{
write8( 0xFF );
ModRM( 0, 4, DISP32 );
write32( MEMADDR(to, 4));
iJMP( ptr32[to] );
}
/* jp rel8 */
@ -736,41 +720,27 @@ emitterT u32* JNO32( u32 to )
/* call func */
emitterT void CALLFunc( uptr func )
{
func -= ( (uptr)x86Ptr + 5 );
assert( (sptr)func <= 0x7fffffff && (sptr)func >= -0x7fffffff );
CALL32(func);
}
/* call rel32 */
emitterT void CALL32( u32 to )
{
write8( 0xE8 );
write32( to );
iCALL( (void*)func );
}
/* call r32 */
emitterT void CALL32R( x86IntRegType to )
{
write8( 0xFF );
ModRM( 3, 2, to );
iCALL( iRegister32( to ) );
}
/* call m32 */
emitterT void CALL32M( u32 to )
{
write8( 0xFF );
ModRM( 0, 2, DISP32 );
write32( MEMADDR(to, 4) );
iCALL( ptr32[to] );
}
emitterT void BSRRtoR(x86IntRegType to, x86IntRegType from)
{
write16( 0xBD0F );
ModRM( 3, from, to );
iBSR( iRegister32(to), iRegister32(from) );
}
emitterT void BSWAP32R( x86IntRegType to )
{
write8( 0x0F );
write8( 0xC8 + to );
iBSWAP( iRegister32(to) );
}

View File

@ -576,8 +576,6 @@ extern u32* JS32( u32 to );
// call func
extern void CALLFunc( uptr func); // based on CALL32
// call rel32
extern void CALL32( u32 to );
// call r32
extern void CALL32R( x86IntRegType to );
// call m32
@ -923,7 +921,6 @@ extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from );
extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from );
extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from );
extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from );
extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8
extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from );
extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 );
extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 );

View File

@ -25,20 +25,24 @@
// note: r64 = mm
//------------------------------------------------------------------
using namespace x86Emitter;
/* movq m64 to r64 */
emitterT void MOVQMtoR( x86MMXRegType to, uptr from )
{
write16( 0x6F0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
iMOVQ( iRegisterMMX(to), (void*)from );
//write16( 0x6F0F );
//ModRM( 0, to, DISP32 );
//write32( MEMADDR(from, 4) );
}
/* movq r64 to m64 */
emitterT void MOVQRtoM( uptr to, x86MMXRegType from )
{
write16( 0x7F0F );
ModRM( 0, from, DISP32 );
write32(MEMADDR(to, 4));
iMOVQ( (void*)to, iRegisterMMX(from) );
//write16( 0x7F0F );
//ModRM( 0, from, DISP32 );
//write32(MEMADDR(to, 4));
}
/* pand r64 to r64 */
@ -470,69 +474,71 @@ emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from )
write32( MEMADDR(from, 4) );
}
emitterT void MOVQ64ItoR( x86MMXRegType reg, u64 i )
{
MOVQMtoR( reg, ( uptr )(x86Ptr) + 2 + 7 );
JMP8( 8 );
write64( i );
}
emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from )
{
write16( 0x6F0F );
ModRM( 3, to, from );
iMOVQ( iRegisterMMX(to), iRegisterMMX(from) );
//write16( 0x6F0F );
//ModRM( 3, to, from );
}
emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset )
{
write16( 0x6F0F );
WriteRmOffsetFrom( to, from, offset );
iMOVQ( iRegisterMMX(to), ptr[x86IndexReg(from)+offset] );
//write16( 0x6F0F );
//WriteRmOffsetFrom( to, from, offset );
}
emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset )
{
write16( 0x7F0F );
WriteRmOffsetFrom( from, to, offset );
iMOVQ( ptr[x86IndexReg(to)+offset], iRegisterMMX(from) );
//write16( 0x7F0F );
//WriteRmOffsetFrom( from, to, offset );
}
/* movd m32 to r64 */
emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from )
{
write16( 0x6E0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
iMOVDZX( iRegisterMMX(to), (void*)from );
//write16( 0x6E0F );
//ModRM( 0, to, DISP32 );
//write32( MEMADDR(from, 4) );
}
/* movd r64 to m32 */
emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from )
{
write16( 0x7E0F );
ModRM( 0, from, DISP32 );
write32( MEMADDR(to, 4) );
iMOVD( (void*)to, iRegisterMMX(from) );
//write16( 0x7E0F );
//ModRM( 0, from, DISP32 );
//write32( MEMADDR(to, 4) );
}
emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from )
{
write16( 0x6E0F );
ModRM( 3, to, from );
iMOVDZX( iRegisterMMX(to), iRegister32(from) );
//write16( 0x6E0F );
//ModRM( 3, to, from );
}
emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset )
{
write16( 0x6E0F );
WriteRmOffsetFrom( to, from, offset );
iMOVDZX( iRegisterMMX(to), ptr[x86IndexReg(from)+offset] );
//write16( 0x6E0F );
//WriteRmOffsetFrom( to, from, offset );
}
emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from )
{
write16( 0x7E0F );
ModRM( 3, from, to );
iMOVD( iRegister32(to), iRegisterMMX(from) );
//write16( 0x7E0F );
//ModRM( 3, from, to );
}
emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset )
{
write16( 0x7E0F );
WriteRmOffsetFrom( from, to, offset );
iMOVD( ptr[x86IndexReg(to)+offset], iRegisterMMX(from) );
//write16( 0x7E0F );
//WriteRmOffsetFrom( from, to, offset );
}
// untested

View File

@ -20,6 +20,8 @@
#include "ix86_legacy_internal.h"
#include "ix86_sse_helpers.h"
using namespace x86Emitter;
//////////////////////////////////////////////////////////////////////////////////////////
// AlwaysUseMovaps [const]
//
@ -303,55 +305,39 @@ emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S
emitterT void SSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); }
emitterT void SSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); }
emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
{
write8(0xf3); SSEMtoR( 0x7e0f, 0);
}
emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
{
write8(0xf3); SSERtoR( 0x7e0f);
}
emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
{
SSERtoM66(0xd60f);
}
emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
{
write8(0xf2);
SSERtoR( 0xd60f);
}
emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
{
write8(0xf3);
SSERtoR( 0xd60f);
}
emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { iMOVQZX( iRegisterXMM(to), (void*)from ); }
emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVQZX( iRegisterXMM(to), iRegisterXMM(from) ); }
emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) { iMOVQ( (void*)to, iRegisterXMM(from) ); }
emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { iMOVQ( iRegisterMMX(to), iRegisterXMM(from) ); }
emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { iMOVQ( iRegisterXMM(to), iRegisterMMX(from) ); }
//**********************************************************************************/
//MOVSS: Move Scalar Single-Precision FP value *
//**********************************************************************************
emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); }
emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { iMOVSSZX( iRegisterXMM(to), (void*)from ); }
emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { iMOVSS( (void*)to, iRegisterXMM(from) ); }
emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVSS( iRegisterXMM(to), iRegisterXMM(from) ); }
emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { iMOVSSZX( iRegisterXMM(to), ptr[x86IndexReg(from)+offset] ); }
emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { iMOVSS( ptr[x86IndexReg(to)+offset], iRegisterXMM(from) ); }
/*emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); }
emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); }
emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } }
emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
write8(0xf3);
RexRB(0, to, from);
write16( 0x100f );
WriteRmOffsetFrom(to, from, offset);
RexRB(0, to, from);
write16( 0x100f );
WriteRmOffsetFrom(to, from, offset);
}
emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset )
{
write8(0xf3);
RexRB(0, from, to);
write16(0x110f);
WriteRmOffsetFrom(from, to, offset);
}
RexRB(0, from, to);
write16(0x110f);
WriteRmOffsetFrom(from, to, offset);
}*/
emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); }
//**********************************************************************************/
@ -1405,8 +1391,7 @@ emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
//////////////////////////////////////////////////////////////////////////////////////////
// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions)
// This header should always be included *after* ix86.h.
//
// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the
// overhead of dynarec instructions that use these, even thought the same check would
// have been done redundantly by the emitter function.

View File

@ -129,22 +129,22 @@ namespace x86Emitter
//
// This is configured to inline emitter functions appropriately for release builds, and
// disables some of the more aggressive inlines for dev builds (which can be helpful when
// debugging).
// debugging). Additionally, I've set up the inlining to be as practical and intelligent
// as possible with regard to constant propagation. Namely this involves forcing inlining
// for (void*) forms of ModRM, which (thanks to constprop) reduce to virtually no code, and
// force-disabling inlining on complicated SibSB forms [since MSVC would sometimes inline
// despite being a generally bad idea].
//
// Note: I use __forceinline directly for most single-line class members, when needed.
// There's no point in using __emitline in these cases since the debugger can't trace into
// single-line functions anyway.
// In the case of (Reg, Imm) forms, the inlining is up to the discreation of the compiler.
//
// Note: I *intentionally* use __forceinline directly for most single-line class members,
// when needed. There's no point in using __emitline in these cases since the debugger
// can't trace into single-line functions anyway.
//
#ifdef PCSX2_DEVBUILD
# define __emitinline
#else
# define __emitinline __forceinline
#endif
#ifdef _MSC_VER
# define __noinline __declspec(noinline)
#else
# define __noinline __attribute__((noinline))
#endif
// ModRM 'mod' field enumeration. Provided mostly for reference:
@ -195,6 +195,8 @@ namespace x86Emitter
}
//////////////////////////////////////////////////////////////////////////////////////////
// iRegister
// Unless templating some fancy stuff, use the friendly iRegister32/16/8 typedefs instead.
//
template< typename OperandType >
class iRegister
@ -213,6 +215,9 @@ namespace x86Emitter
// Returns true if the register is a valid accumulator: Eax, Ax, Al.
bool IsAccumulator() const { return Id == 0; }
// returns true if the register is a valid MMX or XMM register.
bool IsSIMD() const { return OperandSize == 8 || OperandSize == 16; }
bool operator==( const iRegister<OperandType>& src ) const
{
@ -230,6 +235,28 @@ namespace x86Emitter
return *this;
}
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< typename OperandType >
class iRegisterSIMD : public iRegister<OperandType>
{
public:
static const iRegisterSIMD Empty; // defined as an empty/unused value (-1)
public:
iRegisterSIMD(): iRegister<OperandType>() {}
iRegisterSIMD( const iRegisterSIMD& src ) : iRegister<OperandType>( src.Id ) {}
iRegisterSIMD( const iRegister<OperandType>& src ) : iRegister<OperandType>( src ) {}
explicit iRegisterSIMD( int regId ) : iRegister<OperandType>( regId ) {}
iRegisterSIMD<OperandType>& operator=( const iRegisterSIMD<OperandType>& src )
{
Id = src.Id;
return *this;
}
};
// ------------------------------------------------------------------------
// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
@ -239,9 +266,11 @@ namespace x86Emitter
// all about the the templated code in haphazard fashion. Yay.. >_<
//
typedef iRegister<u32> iRegister32;
typedef iRegister<u16> iRegister16;
typedef iRegister<u8> iRegister8;
typedef iRegisterSIMD<u128> iRegisterXMM;
typedef iRegisterSIMD<u64> iRegisterMMX;
typedef iRegister<u32> iRegister32;
typedef iRegister<u16> iRegister16;
typedef iRegister<u8> iRegister8;
class iRegisterCL : public iRegister8
{
@ -249,6 +278,14 @@ namespace x86Emitter
iRegisterCL(): iRegister8( 1 ) {}
};
extern const iRegisterXMM
xmm0, xmm1, xmm2, xmm3,
xmm4, xmm5, xmm6, xmm7;
extern const iRegisterMMX
mm0, mm1, mm2, mm3,
mm4, mm5, mm6, mm7;
extern const iRegister32
eax, ebx, ecx, edx,
esi, edi, ebp, esp;
@ -266,6 +303,7 @@ namespace x86Emitter
//////////////////////////////////////////////////////////////////////////////////////////
// Use 32 bit registers as out index register (for ModSib memory address calculations)
// Only x86IndexReg provides operators for constructing iAddressInfo types.
//
class x86IndexReg : public iRegister32
{
public:
@ -313,9 +351,9 @@ namespace x86Emitter
{
}
__forceinline explicit iAddressInfo( const x86IndexReg& base, int displacement=0 ) :
Base( base ),
Index(),
__forceinline explicit iAddressInfo( const x86IndexReg& index, int displacement=0 ) :
Base(),
Index( index ),
Factor(0),
Displacement( displacement )
{
@ -349,13 +387,6 @@ namespace x86Emitter
__forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); }
};
enum OperandSizeType
{
OpSize_8 = 1,
OpSize_16 = 2,
OpSize_32 = 4,
};
//////////////////////////////////////////////////////////////////////////////////////////
// ModSib - Internal low-level representation of the ModRM/SIB information.
//
@ -422,9 +453,9 @@ namespace x86Emitter
};
//////////////////////////////////////////////////////////////////////////////////////////
// x86IndexerType - This is a static class which provisions our ptr[] syntax.
// iAddressIndexerBase - This is a static class which provisions our ptr[] syntax.
//
struct x86IndexerType
struct iAddressIndexerBase
{
// passthrough instruction, allows ModSib to pass silently through ptr translation
// without doing anything and without compiler error.
@ -450,7 +481,7 @@ namespace x86Emitter
return ModSibBase( (uptr)src );
}
x86IndexerType() {} // applease the GCC gods
iAddressIndexerBase() {} // appease the GCC gods
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -458,7 +489,7 @@ namespace x86Emitter
// specification of the operand size for ImmToMem operations.
//
template< typename OperandType >
struct x86IndexerTypeExplicit
struct iAddressIndexer
{
static const uint OperandSize = sizeof( OperandType );
@ -486,13 +517,15 @@ namespace x86Emitter
return ModSibStrict<OperandType>( (uptr)src );
}
x86IndexerTypeExplicit() {} // GCC initialization dummy
iAddressIndexer() {} // GCC initialization dummy
};
extern const x86IndexerType ptr;
extern const x86IndexerTypeExplicit<u32> ptr32;
extern const x86IndexerTypeExplicit<u16> ptr16;
extern const x86IndexerTypeExplicit<u8> ptr8;
// ptr[] - use this form for instructions which can resolve the address operand size from
// the other register operand sizes.
extern const iAddressIndexerBase ptr;
extern const iAddressIndexer<u32> ptr32; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms
extern const iAddressIndexer<u16> ptr16; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms
extern const iAddressIndexer<u8> ptr8; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms
//////////////////////////////////////////////////////////////////////////////////////////
// JccComparisonType - enumerated possibilities for inspired code branching!
@ -533,64 +566,67 @@ namespace x86Emitter
// as per the measured displacement distance. If the displacement is a valid s8, then
// a j8 is inserted, else a j32.
//
// Performance Analysis: j8's use 4 less byes per opcode, and thus can provide
// minor speed benefits in the form of L1/L2 cache clutter. They're also notably faster
// on P4's, and mildly faster on AMDs. (Core2's and i7's don't care)
// Note: This class is inherently unsafe, and so it's recommended to use iForwardJump8/32
// whenever it is known that the jump destination is (or is not) short. Only use
// iSmartJump in cases where it's unknown what jump encoding will be ideal.
//
class iSmartJump
// Important: Use this tool with caution! iSmartJump cannot be used in cases where jump
// targets overlap, since the writeback of the second target will alter the position of
// the first target (which breaks the relative addressing). To assist in avoiding such
// errors, iSmartJump works based on C++ block scope, where the destruction of the
// iSmartJump object (invoked by a '}') signals the target of the jump. Example:
//
// {
// iCMP( EAX, ECX );
// iSmartJump jumpTo( Jcc_Above );
// [... conditional code ...]
// } // smartjump targets this spot.
//
// No code inside the scope can attempt to jump outside the scoped block (unless the jump
// uses an immediate addressing method, such as Register or Mod/RM forms of JMP/CALL).
// Multiple SmartJumps can be safely nested inside scopes, as long as they are properly
// scoped themselves.
//
// Performance Analysis: j8's use 4 less byes per opcode, and thus can provide minor
// speed benefits in the form of L1/L2 cache clutter, on any CPU. They're also notably
// faster on P4's, and mildly faster on AMDs. (Core2's and i7's don't care)
//
class iSmartJump : public NoncopyableObject
{
protected:
u8* m_target; // x86Ptr target address of this label
u8* m_baseptr; // base address of the instruction (passed to the instruction emitter)
JccComparisonType m_cc; // comparison type of the instruction
bool m_written; // set true when the jump is written (at which point the object becomes invalid)
public:
const int GetMaxInstructionSize() const
{
jASSUME( m_cc != Jcc_Unknown );
return ( m_cc == Jcc_Unconditional ) ? 5 : 6;
}
// Creates a backward jump label which will be passed into a Jxx instruction (or few!)
// later on, and the current x86Ptr is recorded as the target [thus making the class
// creation point the jump target].
iSmartJump()
{
m_target = iGetPtr();
m_baseptr = NULL;
m_cc = Jcc_Unknown;
m_written = false;
}
JccComparisonType GetCondition() const { return m_cc; }
virtual ~iSmartJump();
// ------------------------------------------------------------------------
// ccType - Comparison type to be written back to the jump instruction position.
//
iSmartJump( JccComparisonType ccType )
{
jASSUME( ccType != Jcc_Unknown );
m_target = NULL;
m_baseptr = iGetPtr();
m_cc = ccType;
m_written = false;
iAdvancePtr( GetMaxInstructionSize() );
}
JccComparisonType GetCondition() const
{
return m_cc;
}
u8* GetTarget() const
{
return m_target;
}
protected:
void SetTarget();
};
//////////////////////////////////////////////////////////////////////////////////////////
//
// iForwardJump
// Primary use of this class is through the various iForwardJA8/iForwardJLE32/etc. helpers
// defined later in this header. :)
//
template< typename OperandType >
class iForwardJump
{
@ -601,8 +637,13 @@ namespace x86Emitter
// relative to this address.
s8* const BasePtr;
public:
// The jump instruction is emitted at the point of object construction. The conditional
// type must be valid (Jcc_Unknown generates an assertion).
iForwardJump( JccComparisonType cctype = Jcc_Unconditional );
// Sets the jump target by writing back the current x86Ptr to the jump instruction.
// This method can be called multiple times, re-writing the jump instruction's target
// in each case. (the the last call is the one that takes effect).
void SetTarget() const;
};
@ -627,116 +668,12 @@ namespace x86Emitter
#include "implement/incdec.h"
#include "implement/bittest.h"
#include "implement/test.h"
#include "implement/jmpcall.h"
#include "implement/xmm/movqss.h"
}
//////////////////////////////////////////////////////////////////////////////////////////
//
// ----- Group 1 Instruction Class -----
extern const Internal::Group1ImplAll<Internal::G1Type_ADD> iADD;
extern const Internal::Group1ImplAll<Internal::G1Type_OR> iOR;
extern const Internal::Group1ImplAll<Internal::G1Type_ADC> iADC;
extern const Internal::Group1ImplAll<Internal::G1Type_SBB> iSBB;
extern const Internal::Group1ImplAll<Internal::G1Type_AND> iAND;
extern const Internal::Group1ImplAll<Internal::G1Type_SUB> iSUB;
extern const Internal::Group1ImplAll<Internal::G1Type_XOR> iXOR;
extern const Internal::Group1ImplAll<Internal::G1Type_CMP> iCMP;
// ----- Group 2 Instruction Class -----
// Optimization Note: For Imm forms, we ignore the instruction if the shift count is
// zero. This is a safe optimization since any zero-value shift does not affect any
// flags.
extern const Internal::MovImplAll iMOV;
extern const Internal::TestImplAll iTEST;
extern const Internal::Group2ImplAll<Internal::G2Type_ROL> iROL;
extern const Internal::Group2ImplAll<Internal::G2Type_ROR> iROR;
extern const Internal::Group2ImplAll<Internal::G2Type_RCL> iRCL;
extern const Internal::Group2ImplAll<Internal::G2Type_RCR> iRCR;
extern const Internal::Group2ImplAll<Internal::G2Type_SHL> iSHL;
extern const Internal::Group2ImplAll<Internal::G2Type_SHR> iSHR;
extern const Internal::Group2ImplAll<Internal::G2Type_SAR> iSAR;
// ----- Group 3 Instruction Class -----
extern const Internal::Group3ImplAll<Internal::G3Type_NOT> iNOT;
extern const Internal::Group3ImplAll<Internal::G3Type_NEG> iNEG;
extern const Internal::Group3ImplAll<Internal::G3Type_MUL> iUMUL;
extern const Internal::Group3ImplAll<Internal::G3Type_DIV> iUDIV;
extern const Internal::Group3ImplAll<Internal::G3Type_iDIV> iSDIV;
extern const Internal::IncDecImplAll<false> iINC;
extern const Internal::IncDecImplAll<true> iDEC;
extern const Internal::MovExtendImplAll<false> iMOVZX;
extern const Internal::MovExtendImplAll<true> iMOVSX;
extern const Internal::DwordShiftImplAll<false> iSHLD;
extern const Internal::DwordShiftImplAll<true> iSHRD;
extern const Internal::Group8ImplAll<Internal::G8Type_BT> iBT;
extern const Internal::Group8ImplAll<Internal::G8Type_BTR> iBTR;
extern const Internal::Group8ImplAll<Internal::G8Type_BTS> iBTS;
extern const Internal::Group8ImplAll<Internal::G8Type_BTC> iBTC;
// ------------------------------------------------------------------------
extern const Internal::CMovImplGeneric iCMOV;
extern const Internal::CMovImplAll<Jcc_Above> iCMOVA;
extern const Internal::CMovImplAll<Jcc_AboveOrEqual> iCMOVAE;
extern const Internal::CMovImplAll<Jcc_Below> iCMOVB;
extern const Internal::CMovImplAll<Jcc_BelowOrEqual> iCMOVBE;
extern const Internal::CMovImplAll<Jcc_Greater> iCMOVG;
extern const Internal::CMovImplAll<Jcc_GreaterOrEqual> iCMOVGE;
extern const Internal::CMovImplAll<Jcc_Less> iCMOVL;
extern const Internal::CMovImplAll<Jcc_LessOrEqual> iCMOVLE;
extern const Internal::CMovImplAll<Jcc_Zero> iCMOVZ;
extern const Internal::CMovImplAll<Jcc_Equal> iCMOVE;
extern const Internal::CMovImplAll<Jcc_NotZero> iCMOVNZ;
extern const Internal::CMovImplAll<Jcc_NotEqual> iCMOVNE;
extern const Internal::CMovImplAll<Jcc_Overflow> iCMOVO;
extern const Internal::CMovImplAll<Jcc_NotOverflow> iCMOVNO;
extern const Internal::CMovImplAll<Jcc_Carry> iCMOVC;
extern const Internal::CMovImplAll<Jcc_NotCarry> iCMOVNC;
extern const Internal::CMovImplAll<Jcc_Signed> iCMOVS;
extern const Internal::CMovImplAll<Jcc_Unsigned> iCMOVNS;
extern const Internal::CMovImplAll<Jcc_ParityEven> iCMOVPE;
extern const Internal::CMovImplAll<Jcc_ParityOdd> iCMOVPO;
// ------------------------------------------------------------------------
extern const Internal::SetImplGeneric iSET;
extern const Internal::SetImplAll<Jcc_Above> iSETA;
extern const Internal::SetImplAll<Jcc_AboveOrEqual> iSETAE;
extern const Internal::SetImplAll<Jcc_Below> iSETB;
extern const Internal::SetImplAll<Jcc_BelowOrEqual> iSETBE;
extern const Internal::SetImplAll<Jcc_Greater> iSETG;
extern const Internal::SetImplAll<Jcc_GreaterOrEqual> iSETGE;
extern const Internal::SetImplAll<Jcc_Less> iSETL;
extern const Internal::SetImplAll<Jcc_LessOrEqual> iSETLE;
extern const Internal::SetImplAll<Jcc_Zero> iSETZ;
extern const Internal::SetImplAll<Jcc_Equal> iSETE;
extern const Internal::SetImplAll<Jcc_NotZero> iSETNZ;
extern const Internal::SetImplAll<Jcc_NotEqual> iSETNE;
extern const Internal::SetImplAll<Jcc_Overflow> iSETO;
extern const Internal::SetImplAll<Jcc_NotOverflow> iSETNO;
extern const Internal::SetImplAll<Jcc_Carry> iSETC;
extern const Internal::SetImplAll<Jcc_NotCarry> iSETNC;
extern const Internal::SetImplAll<Jcc_Signed> iSETS;
extern const Internal::SetImplAll<Jcc_Unsigned> iSETNS;
extern const Internal::SetImplAll<Jcc_ParityEven> iSETPE;
extern const Internal::SetImplAll<Jcc_ParityOdd> iSETPO;
}
#include "ix86_inlines.inl"