Implemented Jmp/Jcc and MOVSX/ZX instructions, and added 'i' prefix to most things (will add 'i' to a few more soon -- I think iRegister will be nicer than 'x86Register').

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@982 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-15 15:45:52 +00:00
parent af792b7694
commit 74db1c8189
9 changed files with 673 additions and 647 deletions

View File

@ -804,10 +804,10 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
using namespace x86Emitter;
x86IndexReg thisreg( x86reg );
if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000);
if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000);
if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000);
if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000);
if ( _X ) iMOV(ptr32[thisreg+offset], 0x00000000);
if ( _Y ) iMOV(ptr32[thisreg+offset+4], 0x00000000);
if ( _Z ) iMOV(ptr32[thisreg+offset+8], 0x00000000);
if ( _W ) iMOV(ptr32[thisreg+offset+12], 0x3f800000);
}
return;
}

View File

@ -25,6 +25,7 @@
#include "iR5900.h"
using namespace vtlb_private;
using namespace x86Emitter;
// NOTICE: This function *destroys* EAX!!
// Moves 128 bits of memory from the source register ptr to the dest register ptr.
@ -33,22 +34,20 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
{
// (this is one of my test cases for the new emitter --air)
using namespace x86Emitter;
x86IndexReg src( srcRm );
x86IndexReg dest( destRm );
MOV( eax, ptr[src] );
MOV( ptr[dest], eax );
iMOV( eax, ptr[src] );
iMOV( ptr[dest], eax );
MOV( eax, ptr[src+4] );
MOV( ptr[dest+4], eax );
iMOV( eax, ptr[src+4] );
iMOV( ptr[dest+4], eax );
MOV( eax, ptr[src+8] );
MOV( ptr[dest+8], eax );
iMOV( eax, ptr[src+8] );
iMOV( ptr[dest+8], eax );
MOV( eax, ptr[src+12] );
MOV( ptr[dest+12], eax );
iMOV( eax, ptr[src+12] );
iMOV( ptr[dest+12], eax );
}
/*
@ -166,6 +165,7 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
}
}
// ------------------------------------------------------------------------
static void _vtlb_DynGen_IndirectRead( u32 bits )
{
int szidx;
@ -188,6 +188,7 @@ static void _vtlb_DynGen_IndirectRead( u32 bits )
CALL32R(EAX);
}
// ------------------------------------------------------------------------
// Recompiled input registers:
// ecx = source addr to read from
// edx = ptr to dest to write to
@ -199,17 +200,18 @@ void vtlb_DynGenRead64(u32 bits)
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread = JS8(0);
iForwardJS8 _fullread;
_vtlb_DynGen_DirectRead( bits, false );
u8* cont = JMP8(0);
iForwardJump8 cont;
x86SetJ8(_fullread);
_fullread.SetTarget();
_vtlb_DynGen_IndirectRead( bits );
x86SetJ8(cont);
cont.SetTarget();
}
// ------------------------------------------------------------------------
// Recompiled input registers:
// ecx - source address to read from
// Returns read value in eax.
@ -221,12 +223,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
ADD32RtoR(ECX,EAX);
u8* _fullread = JS8(0);
iForwardJS8 _fullread;
_vtlb_DynGen_DirectRead( bits, sign );
u8* cont = JMP8(0);
iForwardJump8 cont;
x86SetJ8(_fullread);
_fullread.SetTarget();
_vtlb_DynGen_IndirectRead( bits );
// perform sign extension on the result:
@ -245,11 +247,10 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
else
MOVZX32R16toR(EAX,EAX);
}
x86SetJ8(cont);
cont.SetTarget();
}
//
// ------------------------------------------------------------------------
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed.
void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
@ -317,6 +318,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
}
}
// ------------------------------------------------------------------------
// Recompiled input registers:
// ecx - source address to read from
// Returns read value in eax.
@ -449,6 +451,7 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
}
}
// ------------------------------------------------------------------------
static void _vtlb_DynGen_IndirectWrite( u32 bits )
{
int szidx=0;
@ -468,24 +471,26 @@ static void _vtlb_DynGen_IndirectWrite( u32 bits )
CALL32R(EAX);
}
// ------------------------------------------------------------------------
void vtlb_DynGenWrite(u32 sz)
{
MOV32RtoR(EAX,ECX);
SHR32ItoR(EAX,VTLB_PAGE_BITS);
MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2);
ADD32RtoR(ECX,EAX);
u8* _full=JS8(0);
iForwardJS8 _full;
_vtlb_DynGen_DirectWrite( sz );
u8* cont = JMP8(0);
iForwardJump8 cont;
x86SetJ8(_full);
_full.SetTarget();
_vtlb_DynGen_IndirectWrite( sz );
x86SetJ8(cont);
cont.SetTarget();
}
// ------------------------------------------------------------------------
// Generates code for a store instruction, where the address is a known constant.
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed.

View File

@ -96,22 +96,25 @@ const x86Register8
namespace Internal
{
const Group1ImplAll<G1Type_ADD> ADD;
const Group1ImplAll<G1Type_OR> OR;
const Group1ImplAll<G1Type_ADC> ADC;
const Group1ImplAll<G1Type_SBB> SBB;
const Group1ImplAll<G1Type_AND> AND;
const Group1ImplAll<G1Type_SUB> SUB;
const Group1ImplAll<G1Type_XOR> XOR;
const Group1ImplAll<G1Type_CMP> CMP;
const Group1ImplAll<G1Type_ADD> iADD;
const Group1ImplAll<G1Type_OR> iOR;
const Group1ImplAll<G1Type_ADC> iADC;
const Group1ImplAll<G1Type_SBB> iSBB;
const Group1ImplAll<G1Type_AND> iAND;
const Group1ImplAll<G1Type_SUB> iSUB;
const Group1ImplAll<G1Type_XOR> iXOR;
const Group1ImplAll<G1Type_CMP> iCMP;
const Group2ImplAll<G2Type_ROL> ROL;
const Group2ImplAll<G2Type_ROR> ROR;
const Group2ImplAll<G2Type_RCL> RCL;
const Group2ImplAll<G2Type_RCR> RCR;
const Group2ImplAll<G2Type_SHL> SHL;
const Group2ImplAll<G2Type_SHR> SHR;
const Group2ImplAll<G2Type_SAR> SAR;
const Group2ImplAll<G2Type_ROL> iROL;
const Group2ImplAll<G2Type_ROR> iROR;
const Group2ImplAll<G2Type_RCL> iRCL;
const Group2ImplAll<G2Type_RCR> iRCR;
const Group2ImplAll<G2Type_SHL> iSHL;
const Group2ImplAll<G2Type_SHR> iSHR;
const Group2ImplAll<G2Type_SAR> iSAR;
const MovExtendImplAll<true> iMOVSX;
const MovExtendImplAll<false> iMOVZX;
// Performance note: VC++ wants to use byte/word register form for the following
// ModRM/SibSB constructors if we use iWrite<u8>, and furthermore unrolls the
@ -149,11 +152,9 @@ namespace Internal
// instruction ca be encoded as ModRm alone.
static __forceinline bool NeedsSibMagic( const ModSibBase& info )
{
// If base register is ESP, then we need a SIB:
if( info.Base.IsStackPointer() ) return true;
// no registers? no sibs!
// (ModSibBase::Reduce
// (ModSibBase::Reduce always places a register in Index, and optionally leaves
// Base empty if only register is specified)
if( info.Index.IsEmpty() ) return false;
// A scaled register needs a SIB
@ -188,7 +189,7 @@ namespace Internal
if( info.Index.IsEmpty() )
{
ModRM( 0, regfield, ModRm_UseDisp32 );
iWrite<u32>( info.Displacement );
iWrite<s32>( info.Displacement );
return;
}
else
@ -211,7 +212,7 @@ namespace Internal
{
ModRM( 0, regfield, ModRm_UseSib );
SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 );
iWrite<u32>( info.Displacement );
iWrite<s32>( info.Displacement );
return;
}
else
@ -226,7 +227,7 @@ namespace Internal
if( displacement_size != 0 )
{
*(u32*)x86Ptr = info.Displacement;
*(s32*)x86Ptr = info.Displacement;
x86Ptr += (displacement_size == 1) ? 1 : 4;
}
}
@ -234,6 +235,44 @@ namespace Internal
using namespace Internal;
// ------------------------------------------------------------------------
// Assigns the current emitter buffer target address.
// This is provided instead of using x86Ptr directly, since we may in the future find
// a need to change the storage class system for the x86Ptr 'under the hood.'
__emitinline void iSetPtr( void* ptr )
{
x86Ptr = (u8*)ptr;
}
// ------------------------------------------------------------------------
// Retrieves the current emitter buffer target address.
// This is provided instead of using x86Ptr directly, since we may in the future find
// a need to change the storage class system for the x86Ptr 'under the hood.'
__emitinline u8* iGetPtr()
{
return x86Ptr;
}
// ------------------------------------------------------------------------
__emitinline void iAlignPtr( uint bytes )
{
// forward align
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) );
}
// ------------------------------------------------------------------------
__emitinline void iAdvancePtr( uint bytes )
{
if( IsDevBuild )
{
// common debugger courtesy: advance with INT3 as filler.
for( uint i=0; i<bytes; i++ )
iWrite<u8>( 0xcc );
}
else
x86Ptr += bytes;
}
// ------------------------------------------------------------------------
// Internal implementation of EmitSibMagic which has been custom tailored
// to optimize special forms of the Lea instructions accordingly, such
@ -258,21 +297,33 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
if( src.Index.IsEmpty() )
{
MOV( to, src.Displacement );
iMOV( to, src.Displacement );
return;
}
else if( displacement_size == 0 )
{
MOV( to, ToReg( src.Index.Id ) );
iMOV( to, ToReg( src.Index.Id ) );
return;
}
else
{
// note: no need to do ebp+0 check since we encode all 0 displacements as
// register assignments above (via MOV)
if( !preserve_flags )
{
// encode as MOV and ADD combo. Make sure to use the immediate on the
// ADD since it can encode as an 8-bit sign-extended value.
iMOV( to, ToReg( src.Index.Id ) );
iADD( to, src.Displacement );
return;
}
else
{
// note: no need to do ebp+0 check since we encode all 0 displacements as
// register assignments above (via MOV)
iWrite<u8>( 0x8d );
ModRM( displacement_size, to.Id, src.Index.Id );
iWrite<u8>( 0x8d );
ModRM( displacement_size, to.Id, src.Index.Id );
}
}
}
else
@ -288,8 +339,8 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
// (this does not apply to older model P4s with the broken barrel shifter,
// but we currently aren't optimizing for that target anyway).
MOV( to, ToReg( src.Index.Id ) );
SHL( to, src.Scale );
iMOV( to, ToReg( src.Index.Id ) );
iSHL( to, src.Scale );
return;
}
iWrite<u8>( 0x8d );
@ -300,26 +351,46 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
}
else
{
if( src.Scale == 0 )
{
if( !preserve_flags )
{
if( src.Index == esp )
{
// ESP is not encodable as an index (ix86 ignores it), thus:
iMOV( to, ToReg( src.Base.Id ) ); // will do the trick!
iADD( to, src.Displacement );
return;
}
else if( src.Displacement == 0 )
{
iMOV( to, ToReg( src.Base.Id ) );
iADD( to, ToReg( src.Index.Id ) );
return;
}
}
else if( (src.Index == esp) && (src.Displacement == 0) )
{
// special case handling of ESP as Index, which is replaceable with
// a single MOV even when preserve_flags is set! :D
iMOV( to, ToReg( src.Base.Id ) );
return;
}
}
if( src.Base == ebp && displacement_size == 0 )
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
iWrite<u8>( 0x8d );
ModRM( displacement_size, to.Id, ModRm_UseSib );
SibSB( src.Scale, src.Index.Id, src.Base.Id );
/*switch( displacement_size )
{
case 0: break;
case 1: emit.write<u8>( src.Displacement ); break;
case 2: emit.write<u32>( src.Displacement ); break;
jNO_DEFAULT
}*/
}
}
if( displacement_size != 0 )
{
*(u32*)x86Ptr = src.Displacement;
*(s32*)x86Ptr = src.Displacement;
x86Ptr += (displacement_size == 1) ? 1 : 4;
}
}
@ -350,6 +421,7 @@ protected:
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
public:
// ------------------------------------------------------------------------
static __forceinline void Emit( const x86Register<OperandSize>& to, const x86Register<OperandSize>& from )
{
if( to == from ) return; // ignore redundant MOVs.
@ -359,6 +431,7 @@ public:
ModRM( 3, from.Id, to.Id );
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const ModSibBase& dest, const x86Register<OperandSize>& from )
{
prefix16();
@ -378,6 +451,7 @@ public:
}
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const x86Register<OperandSize>& to, const ModSibBase& src )
{
prefix16();
@ -397,6 +471,7 @@ public:
}
}
// ------------------------------------------------------------------------
static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
{
// Note: MOV does not have (reg16/32,imm8) forms.
@ -406,6 +481,7 @@ public:
iWrite<ImmType>( imm );
}
// ------------------------------------------------------------------------
static __forceinline void Emit( ModSibStrict<OperandSize> dest, ImmType imm )
{
prefix16();
@ -436,50 +512,50 @@ namespace Internal
// TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D
// ---------- 32 Bit Interface -----------
__forceinline void MOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); }
__forceinline void MOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); }
__forceinline void MOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); }
__noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); }
__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); }
__forceinline void iMOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); }
__forceinline void iMOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); }
__forceinline void iMOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); }
__noinline void iMOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); }
__noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); }
void MOV( const x86Register32& to, u32 imm, bool preserve_flags )
void iMOV( const x86Register32& to, u32 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
XOR( to, to );
iXOR( to, to );
else
MOV32i::Emit( to, imm );
}
// ---------- 16 Bit Interface -----------
__forceinline void MOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); }
__forceinline void MOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); }
__forceinline void MOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); }
__noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); }
__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); }
__forceinline void iMOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); }
__forceinline void iMOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); }
__forceinline void iMOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); }
__noinline void iMOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); }
__noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); }
void MOV( const x86Register16& to, u16 imm, bool preserve_flags )
void iMOV( const x86Register16& to, u16 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
XOR( to, to );
iXOR( to, to );
else
MOV16i::Emit( to, imm );
}
// ---------- 8 Bit Interface -----------
__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); }
__forceinline void MOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); }
__forceinline void MOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); }
__noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); }
__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); }
__forceinline void iMOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); }
__forceinline void iMOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); }
__forceinline void iMOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); }
__noinline void iMOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); }
__noinline void iMOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); }
__noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); }
void MOV( const x86Register8& to, u8 imm, bool preserve_flags )
void iMOV( const x86Register8& to, u8 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
XOR( to, to );
iXOR( to, to );
else
MOV8i::Emit( to, imm );
}
@ -510,7 +586,8 @@ __forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); }
__emitinline void POP( const ModSibBase& from )
{
iWrite<u8>( 0x8f ); Internal::EmitSibMagic( 0, from );
iWrite<u8>( 0x8f );
Internal::EmitSibMagic( 0, from );
}
__forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); }
@ -518,7 +595,8 @@ __forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); }
__emitinline void PUSH( const ModSibBase& from )
{
iWrite<u8>( 0xff ); Internal::EmitSibMagic( 6, from );
iWrite<u8>( 0xff );
Internal::EmitSibMagic( 6, from );
}
// pushes the EFLAGS register onto the stack

View File

@ -1,225 +0,0 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "PrecompiledHeader.h"
#include "ix86_internal.h"
//------------------------------------------------------------------
// x86 Group 1 Instructions
//------------------------------------------------------------------
// Group 1 instructions all adhere to the same encoding scheme, and so they all
// share the same emitter which has been coded here.
//
// Group 1 Table: [column value is the Reg field of the ModRM byte]
//
// 0 1 2 3 4 5 6 7
// ADD OR ADC SBB AND SUB XOR CMP
//
namespace x86Emitter {
//////////////////////////////////////////////////////////////////////////////////////////
// x86RegConverter - this class is used internally by the emitter as a helper for
// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API
// can use type-safe 8/16/32 bit register types, and the underlying code can use a single
// unified emitter to generate all function variations + prefixes and such. :)
//
class x86RegConverter : public x86Register32
{
public:
x86RegConverter( x86Register32 src ) : x86Register32( src ) {}
x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {}
x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {}
};
enum Group1InstructionType
{
G1Type_ADD=0,
G1Type_OR,
G1Type_ADC,
G1Type_SBB,
G1Type_AND,
G1Type_SUB,
G1Type_XOR,
G1Type_CMP
};
static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false )
{
write8( (bit8form ? 0 : 1) | (inst<<3) );
ModRM( 3, from.Id, to.Id );
}
static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false )
{
write8( (bit8form ? 0 : 1) | (inst<<3) );
EmitSibMagic( from, sibdest );
}
static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false )
{
write8( (bit8form ? 2 : 3) | (inst<<3) );
EmitSibMagic( to, sibsrc );
}
// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit
// instruction (AX,BX,etc).
template< typename T >
static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm )
{
bool bit8form = (sizeof(T) == 1);
if( !bit8form && is_s8( imm ) )
{
write8( 0x83 );
ModRM( 3, inst, to.Id );
write8( (s8)imm );
}
else
{
if( to == eax )
write8( (bit8form ? 4 : 5) | (inst<<3) );
else
{
write8( bit8form ? 0x80 : 0x81 );
ModRM( 3, inst, to.Id );
}
x86write<T>( imm );
}
}
// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit
// instruction (AX,BX,etc).
template< typename T >
static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm )
{
bool bit8form = (sizeof(T) == 1);
write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) );
EmitSibMagic( inst, sibdest );
if( !bit8form && is_s8( imm ) )
write8( (s8)imm );
else
x86write<T>( imm );
}
// 16 bit instruction prefix!
static __forceinline void prefix16() { write8(0x66); }
//////////////////////////////////////////////////////////////////////////////////////////
//
#define DEFINE_GROUP1_OPCODE( cod ) \
emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \
emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \
emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \
emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \
emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \
emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \
emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \
emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \
\
emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \
emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \
emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \
emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \
emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \
emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \
emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \
emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \
\
emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \
emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \
emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \
emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \
emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \
emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \
emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \
emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); }
DEFINE_GROUP1_OPCODE( ADD )
DEFINE_GROUP1_OPCODE( CMP )
DEFINE_GROUP1_OPCODE( OR )
DEFINE_GROUP1_OPCODE( ADC )
DEFINE_GROUP1_OPCODE( SBB )
DEFINE_GROUP1_OPCODE( AND )
DEFINE_GROUP1_OPCODE( SUB )
DEFINE_GROUP1_OPCODE( XOR )
} // end namespace x86Emitter
static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src )
{
return x86Emitter::x86Register32( src );
}
static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src )
{
return x86Emitter::x86Register16( src );
}
static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src )
{
return x86Emitter::x86Register8( src );
}
static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src )
{
return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
#define DEFINE_LEGACY_HELPER( cod, bits ) \
emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \
emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \
emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \
emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \
emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \
emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); }
#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \
DEFINE_LEGACY_HELPER( cod, 32 ) \
DEFINE_LEGACY_HELPER( cod, 16 ) \
DEFINE_LEGACY_HELPER( cod, 8 )
DEFINE_GROUP1_OPCODE_LEGACY( ADD )
DEFINE_GROUP1_OPCODE_LEGACY( CMP )
DEFINE_GROUP1_OPCODE_LEGACY( OR )
DEFINE_GROUP1_OPCODE_LEGACY( ADC )
DEFINE_GROUP1_OPCODE_LEGACY( SBB )
DEFINE_GROUP1_OPCODE_LEGACY( AND )
DEFINE_GROUP1_OPCODE_LEGACY( SUB )
DEFINE_GROUP1_OPCODE_LEGACY( XOR )
// Special forms needed by the legacy emitter syntax:
emitterT void AND32I8toR( x86IntRegType to, s8 from )
{
x86Emitter::AND32( _reghlp32(to), from );
}
emitterT void AND32I8toM( uptr to, s8 from )
{
x86Emitter::AND32( (void*)to, from );
}

View File

@ -234,4 +234,58 @@ namespace x86Emitter
return *this;
}
//////////////////////////////////////////////////////////////////////////////////////////
//
// ------------------------------------------------------------------------
template< typename OperandType >
iForwardJump<OperandType>::iForwardJump( JccComparisonType cctype ) :
BasePtr( (s8*)iGetPtr() +
((OperandSize == 1) ? 2 : // j8's are always 2 bytes.
((cctype==Jcc_Unconditional) ? 5 : 6 )) // j32's are either 5 or 6 bytes
)
{
jASSUME( cctype != Jcc_Unknown );
jASSUME( OperandSize == 1 || OperandSize == 4 );
if( OperandSize == 1 )
iWrite<u8>( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) );
else
{
if( cctype == Jcc_Unconditional )
iWrite<u8>( 0xe9 );
else
{
iWrite<u8>( 0x0f );
iWrite<u8>( 0x80 | cctype );
}
}
iAdvancePtr( OperandSize );
}
// ------------------------------------------------------------------------
template< typename OperandType >
void iForwardJump<OperandType>::SetTarget() const
{
jASSUME( BasePtr != NULL );
sptr displacement = (sptr)iGetPtr() - (sptr)BasePtr;
if( OperandSize == 1 )
{
if( !is_s8( displacement ) )
{
assert( false );
Console::Error( "Emitter Error: Invalid short jump displacement = 0x%x", params (int)displacement );
}
BasePtr[-1] = (s8)displacement;
}
else
{
// full displacement, no sanity checks needed :D
((s32*)BasePtr)[-1] = displacement;
}
}
}

View File

@ -35,6 +35,9 @@
namespace x86Emitter
{
extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false );
// ----- Lea Instructions (Load Effective Address) -----
// Note: alternate (void*) forms of these instructions are not provided since those
// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
@ -56,56 +59,153 @@ namespace x86Emitter
static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); }
// ------------------------------------------------------------------------
using Internal::ADD;
using Internal::OR;
using Internal::ADC;
using Internal::SBB;
using Internal::AND;
using Internal::SUB;
using Internal::XOR;
using Internal::CMP;
using Internal::iADD;
using Internal::iOR;
using Internal::iADC;
using Internal::iSBB;
using Internal::iAND;
using Internal::iSUB;
using Internal::iXOR;
using Internal::iCMP;
using Internal::ROL;
using Internal::ROR;
using Internal::RCL;
using Internal::RCR;
using Internal::SHL;
using Internal::SHR;
using Internal::SAR;
using Internal::iROL;
using Internal::iROR;
using Internal::iRCL;
using Internal::iRCR;
using Internal::iSHL;
using Internal::iSHR;
using Internal::iSAR;
using Internal::iMOVSX;
using Internal::iMOVZX;
//////////////////////////////////////////////////////////////////////////////////////////
// MOV instructions!
// ---------- 32 Bit Interface -----------
extern void MOV( const x86Register32& to, const x86Register32& from );
extern void MOV( const ModSibBase& sibdest, const x86Register32& from );
extern void MOV( const x86Register32& to, const ModSibBase& sibsrc );
extern void MOV( const x86Register32& to, const void* src );
extern void MOV( const void* dest, const x86Register32& from );
extern void iMOV( const x86Register32& to, const x86Register32& from );
extern void iMOV( const ModSibBase& sibdest, const x86Register32& from );
extern void iMOV( const x86Register32& to, const ModSibBase& sibsrc );
extern void iMOV( const x86Register32& to, const void* src );
extern void iMOV( const void* dest, const x86Register32& from );
// preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
extern void MOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
extern void MOV( const ModSibStrict<4>& sibdest, u32 imm );
extern void iMOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm );
// ---------- 16 Bit Interface -----------
extern void MOV( const x86Register16& to, const x86Register16& from );
extern void MOV( const ModSibBase& sibdest, const x86Register16& from );
extern void MOV( const x86Register16& to, const ModSibBase& sibsrc );
extern void MOV( const x86Register16& to, const void* src );
extern void MOV( const void* dest, const x86Register16& from );
extern void iMOV( const x86Register16& to, const x86Register16& from );
extern void iMOV( const ModSibBase& sibdest, const x86Register16& from );
extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc );
extern void iMOV( const x86Register16& to, const void* src );
extern void iMOV( const void* dest, const x86Register16& from );
// preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
extern void MOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
extern void MOV( const ModSibStrict<2>& sibdest, u16 imm );
extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm );
// ---------- 8 Bit Interface -----------
extern void MOV( const x86Register8& to, const x86Register8& from );
extern void MOV( const ModSibBase& sibdest, const x86Register8& from );
extern void MOV( const x86Register8& to, const ModSibBase& sibsrc );
extern void MOV( const x86Register8& to, const void* src );
extern void MOV( const void* dest, const x86Register8& from );
extern void iMOV( const x86Register8& to, const x86Register8& from );
extern void iMOV( const ModSibBase& sibdest, const x86Register8& from );
extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc );
extern void iMOV( const x86Register8& to, const void* src );
extern void iMOV( const void* dest, const x86Register8& from );
extern void MOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
extern void MOV( const ModSibStrict<1>& sibdest, u8 imm );
extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm );
//////////////////////////////////////////////////////////////////////////////////////////
// JMP / Jcc Instructions!
#define DEFINE_FORWARD_JUMP( label, cond ) \
template< typename OperandType > \
class iForward##label : public iForwardJump<OperandType> \
{ \
public: \
iForward##label() : iForwardJump<OperandType>( cond ) {} \
};
// ------------------------------------------------------------------------
// Note: typedefs below are defined individually in order to appease Intellisense
// resolution. Including them into the class definition macro above breaks it.
typedef iForwardJump<s8> iForwardJump8;
typedef iForwardJump<s32> iForwardJump32;
DEFINE_FORWARD_JUMP( JA, Jcc_Above );
DEFINE_FORWARD_JUMP( JB, Jcc_Below );
DEFINE_FORWARD_JUMP( JAE, Jcc_AboveOrEqual );
DEFINE_FORWARD_JUMP( JBE, Jcc_BelowOrEqual );
typedef iForwardJA<s8> iForwardJA8;
typedef iForwardJA<s32> iForwardJA32;
typedef iForwardJB<s8> iForwardJB8;
typedef iForwardJB<s32> iForwardJB32;
typedef iForwardJAE<s8> iForwardJAE8;
typedef iForwardJAE<s32> iForwardJAE32;
typedef iForwardJBE<s8> iForwardJBE8;
typedef iForwardJBE<s32> iForwardJBE32;
DEFINE_FORWARD_JUMP( JG, Jcc_Greater );
DEFINE_FORWARD_JUMP( JL, Jcc_Less );
DEFINE_FORWARD_JUMP( JGE, Jcc_GreaterOrEqual );
DEFINE_FORWARD_JUMP( JLE, Jcc_LessOrEqual );
typedef iForwardJG<s8> iForwardJG8;
typedef iForwardJG<s32> iForwardJG32;
typedef iForwardJL<s8> iForwardJL8;
typedef iForwardJL<s32> iForwardJL32;
typedef iForwardJGE<s8> iForwardJGE8;
typedef iForwardJGE<s32> iForwardJGE32;
typedef iForwardJLE<s8> iForwardJLE8;
typedef iForwardJLE<s32> iForwardJLE32;
DEFINE_FORWARD_JUMP( JZ, Jcc_Zero );
DEFINE_FORWARD_JUMP( JE, Jcc_Equal );
DEFINE_FORWARD_JUMP( JNZ, Jcc_NotZero );
DEFINE_FORWARD_JUMP( JNE, Jcc_NotEqual );
typedef iForwardJZ<s8> iForwardJZ8;
typedef iForwardJZ<s32> iForwardJZ32;
typedef iForwardJE<s8> iForwardJE8;
typedef iForwardJE<s32> iForwardJE32;
typedef iForwardJNZ<s8> iForwardJNZ8;
typedef iForwardJNZ<s32> iForwardJNZ32;
typedef iForwardJNE<s8> iForwardJNE8;
typedef iForwardJNE<s32> iForwardJNE32;
DEFINE_FORWARD_JUMP( JS, Jcc_Signed );
DEFINE_FORWARD_JUMP( JNS, Jcc_Unsigned );
typedef iForwardJS<s8> iForwardJS8;
typedef iForwardJS<s32> iForwardJS32;
typedef iForwardJNS<s8> iForwardJNS8;
typedef iForwardJNS<s32> iForwardJNS32;
DEFINE_FORWARD_JUMP( JO, Jcc_Overflow );
DEFINE_FORWARD_JUMP( JNO, Jcc_NotOverflow );
typedef iForwardJO<s8> iForwardJO8;
typedef iForwardJO<s32> iForwardJO32;
typedef iForwardJNO<s8> iForwardJNO8;
typedef iForwardJNO<s32> iForwardJNO32;
DEFINE_FORWARD_JUMP( JC, Jcc_Carry );
DEFINE_FORWARD_JUMP( JNC, Jcc_NotCarry );
typedef iForwardJC<s8> iForwardJC8;
typedef iForwardJC<s32> iForwardJC32;
typedef iForwardJNC<s8> iForwardJNC8;
typedef iForwardJNC<s32> iForwardJNC32;
DEFINE_FORWARD_JUMP( JPE, Jcc_ParityEven );
DEFINE_FORWARD_JUMP( JPO, Jcc_ParityOdd );
typedef iForwardJPE<s8> iForwardJPE8;
typedef iForwardJPE<s32> iForwardJPE32;
typedef iForwardJPO<s8> iForwardJPO8;
typedef iForwardJPO<s32> iForwardJPO32;
}

View File

@ -36,157 +36,83 @@
#include "System.h"
#include "ix86_internal.h"
namespace x86Emitter {
// Another Work-in-Progress!!
/*
emitterT void x86SetPtr( u8* ptr )
// ------------------------------------------------------------------------
void iSmartJump::SetTarget()
{
x86Ptr = ptr;
jASSUME( !m_written );
if( m_written )
throw Exception::InvalidOperation( "Attempted to set SmartJump label multiple times." );
m_target = iGetPtr();
if( m_baseptr == NULL ) return;
iSetPtr( m_baseptr );
u8* const saveme = m_baseptr + GetMaxInstructionSize();
iJccKnownTarget( m_cc, m_target, true );
// Copy recompiled data inward if the jump instruction didn't fill the
// alloted buffer (means that we optimized things to a j8!)
const int spacer = (sptr)saveme - (sptr)iGetPtr();
if( spacer != 0 )
{
u8* destpos = iGetPtr();
const int copylen = (sptr)m_target - (sptr)saveme;
memcpy_fast( destpos, saveme, copylen );
iSetPtr( m_target - spacer );
}
m_written = true;
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86Ptr Label API
//
class x86Label
// ------------------------------------------------------------------------
// Writes a jump at the current x86Ptr, which targets a pre-established target address.
// (usually a backwards jump)
//
// slideForward - used internally by iSmartJump to indicate that the jump target is going
// to slide forward in the event of an 8 bit displacement.
//
__emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward )
{
public:
class Entry
{
protected:
u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type)
u8* m_base; // base address of the instruction (passed to the instruction)
int m_cc; // comparison type of the instruction
public:
explicit Entry( int cc ) :
m_base( x86Ptr )
, m_writebackpos( writebackidx )
{
}
// Calculate the potential j8 displacement first, assuming an instruction length of 2:
sptr displacement8 = (sptr)target - ((sptr)iGetPtr() + 2);
void Commit( const u8* target ) const
{
//uptr reltarget = (uptr)m_base - (uptr)target;
//*((u32*)&m_base[m_writebackpos]) = reltarget;
jASSUME( m_emit != NULL );
jASSUME( m_base != NULL );
return m_emit( m_base, target, m_cc );
}
};
const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0;
displacement8 -= slideVal;
protected:
u8* m_target; // x86Ptr target address of this label
Entry m_writebacks[8];
int m_writeback_curpos;
public:
// creates a label list with no valid target.
// Use x86LabelList::Set() to set a target prior to class destruction.
x86Label() : m_target()
{
}
x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() )
{
}
// if the following assert fails it means we accidentally used slideForard on a backward
// jump (which is an invalid operation since there's nothing to slide forward).
if( slideForward ) jASSUME( displacement8 >= 0 );
// Performs all address writebacks on destruction.
virtual ~x86Label()
if( is_s8( displacement8 ) )
{
IssueWritebacks();
iWrite<u8>( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) );
iWrite<s8>( displacement8 );
}
else
{
// Perform a 32 bit jump instead. :(
void SetTarget() { m_address = x86Ptr; }
void SetTarget( void* addr ) { m_address = (u8*)addr; }
void Clear()
{
m_writeback_curpos = 0;
}
// Adds a jump/call instruction to this label for writebacks.
void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc )
{
jASSUME( m_writeback_curpos < MaxWritebacks );
m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) );
m_writeback_curpos++;
}
void IssueWritebacks() const
{
const std::list<Entry>::const_iterator& start = m_list_writebacks.
for( ; start!=end; start++ )
if( comparison == Jcc_Unconditional )
iWrite<u8>( 0xe9 );
else
{
Entry& current = *start;
u8* donespot = current.Commit();
// Copy the data from the m_nextinst to the current location,
// and update any additional writebacks (but what about multiple labels?!?)
iWrite<u8>( 0x0f );
iWrite<u8>( 0x80 | comparison );
}
iWrite<s32>( (sptr)target - ((sptr)iGetPtr() + 4) );
}
};
#endif
void JMP( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJMP, 0 );
}
void JLE( x86Label& dest )
__emitinline void iJcc( JccComparisonType comparison, void* target )
{
dest.AddWriteback( x86Ptr, emitJCC, 0 );
iJccKnownTarget( comparison, target );
}
void x86SetJ8( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
*j8 = (u8)jump;
}
void x86SetJ8A( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
if( ((uptr)x86Ptr&0xf) > 4 ) {
uptr newjump = jump + 16-((uptr)x86Ptr&0xf);
if( newjump <= 0x7f ) {
jump = newjump;
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
}
}
*j8 = (u8)jump;
}
emitterT void x86SetJ32( u32* j32 )
{
*j32 = ( x86Ptr - (u8*)j32 ) - 4;
}
emitterT void x86SetJ32A( u32* j32 )
{
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
x86SetJ32(j32);
}
emitterT void x86Align( int bytes )
{
// forward align
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
}
*/
}

View File

@ -57,27 +57,29 @@ static __forceinline ModSibStrict<OperandSize> _mhlp2( x86IntRegType src1, x86In
return ModSibStrict<OperandSize>( x86IndexReg(src2), x86IndexReg(src1) );
}
//////////////////////////////////////////////////////////////////////////////////////////
//
#define DEFINE_LEGACY_HELPER( cod, bits ) \
emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { cod( _reghlp<bits/8>(to), _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { cod( _reghlp<bits/8>(to), imm ); } \
emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { cod( _reghlp<bits/8>(to), (void*)from ); } \
emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { cod( (void*)to, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { cod( ptr##bits[to], imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { cod( _mhlp<bits/8>(to) + offset, imm ); } \
emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp<bits/8>(to), _mhlp<bits/8>(from) + offset ); } \
emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp<bits/8>(to) + offset, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { i##cod( _reghlp<bits/8>(to), _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { i##cod( _reghlp<bits/8>(to), imm ); } \
emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { i##cod( _reghlp<bits/8>(to), (void*)from ); } \
emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { i##cod( (void*)to, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { i##cod( ptr##bits[to], imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { i##cod( _mhlp<bits/8>(to) + offset, imm ); } \
emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp<bits/8>(to), _mhlp<bits/8>(from) + offset ); } \
emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp<bits/8>(to) + offset, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
{ cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
{ i##cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \
{ cod( _reghlp<bits/8>(to), _mhlp2<bits/8>(from1,from2) + offset ); }
{ i##cod( _reghlp<bits/8>(to), _mhlp2<bits/8>(from1,from2) + offset ); }
#define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \
emitterT void cod##bits##CLtoR( x86IntRegType to ) { cod( _reghlp<bits/8>(to), cl ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { cod( _reghlp<bits/8>(to), imm ); } \
emitterT void cod##bits##CLtoM( uptr to ) { cod( ptr##bits[to], cl ); } \
emitterT void cod##bits##ItoM( uptr to, u8 imm ) { cod( ptr##bits[to], imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { cod( _mhlp<bits/8>(to) + offset, imm ); } \
emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp<bits/8>(to) + offset, cl ); }
emitterT void cod##bits##CLtoR( x86IntRegType to ) { i##cod( _reghlp<bits/8>(to), cl ); } \
emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { i##cod( _reghlp<bits/8>(to), imm ); } \
emitterT void cod##bits##CLtoM( uptr to ) { i##cod( ptr##bits[to], cl ); } \
emitterT void cod##bits##ItoM( uptr to, u8 imm ) { i##cod( ptr##bits[to], imm ); } \
emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { i##cod( _mhlp<bits/8>(to) + offset, imm ); } \
emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { i##cod( _mhlp<bits/8>(to) + offset, cl ); }
//emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \
// { cod( _mhlp2<bits/8>(to1,to2) + offset, _reghlp<bits/8>(from) ); } \
@ -113,32 +115,44 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR )
DEFINE_OPCODE_LEGACY( MOV )
// ------------------------------------------------------------------------
#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \
emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \
emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \
emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); }
DEFINE_LEGACY_MOVEXTEND( S, 16 )
DEFINE_LEGACY_MOVEXTEND( Z, 16 )
DEFINE_LEGACY_MOVEXTEND( S, 8 )
DEFINE_LEGACY_MOVEXTEND( Z, 8 )
// mov r32 to [r32<<scale+from2]
emitterT void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
MOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
iMOV( x86Register32(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
MOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
iMOV( x86Register16(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2, int scale )
{
MOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
iMOV( x86Register8(to), ptr[(x86IndexReg(from1)<<scale) + from2] );
}
// Special forms needed by the legacy emitter syntax:
emitterT void AND32I8toR( x86IntRegType to, s8 from )
{
AND( _reghlp<4>(to), from );
iAND( _reghlp<4>(to), from );
}
emitterT void AND32I8toM( uptr to, s8 from )
{
AND( ptr8[to], from );
iAND( ptr8[to], from );
}
@ -310,103 +324,6 @@ emitterT void NOP( void )
write8(0x90);
}
/* movsx r8 to r32 */
emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from )
{
RexRB(0,to,from);
write16( 0xBE0F );
ModRM( 3, to, from );
}
emitterT void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset )
{
RexRB(0,to,from);
write16( 0xBE0F );
WriteRmOffsetFrom(to,from,offset);
}
/* movsx m8 to r32 */
emitterT void MOVSX32M8toR( x86IntRegType to, u32 from )
{
RexR(0,to);
write16( 0xBE0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* movsx r16 to r32 */
emitterT void MOVSX32R16toR( x86IntRegType to, x86IntRegType from )
{
RexRB(0,to,from);
write16( 0xBF0F );
ModRM( 3, to, from );
}
emitterT void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset )
{
RexRB(0,to,from);
write16( 0xBF0F );
WriteRmOffsetFrom(to,from,offset);
}
/* movsx m16 to r32 */
emitterT void MOVSX32M16toR( x86IntRegType to, u32 from )
{
RexR(0,to);
write16( 0xBF0F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* movzx r8 to r32 */
emitterT void MOVZX32R8toR( x86IntRegType to, x86IntRegType from )
{
RexRB(0,to,from);
write16( 0xB60F );
ModRM( 3, to, from );
}
emitterT void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset )
{
RexRB(0,to,from);
write16( 0xB60F );
WriteRmOffsetFrom(to,from,offset);
}
/* movzx m8 to r32 */
emitterT void MOVZX32M8toR( x86IntRegType to, u32 from )
{
RexR(0,to);
write16( 0xB60F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* movzx r16 to r32 */
emitterT void MOVZX32R16toR( x86IntRegType to, x86IntRegType from )
{
RexRB(0,to,from);
write16( 0xB70F );
ModRM( 3, to, from );
}
emitterT void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset )
{
RexRB(0,to,from);
write16( 0xB70F );
WriteRmOffsetFrom(to,from,offset);
}
/* movzx m16 to r32 */
emitterT void MOVZX32M16toR( x86IntRegType to, u32 from )
{
RexR(0,to);
write16( 0xB70F );
ModRM( 0, to, DISP32 );
write32( MEMADDR(from, 4) );
}
/* cmovbe r32 to r32 */
emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from )
{

View File

@ -18,9 +18,8 @@
#pragma once
extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs
extern void cpudetectInit();//this is all that needs to be called and will fill up the below structs
typedef struct CAPABILITIES CAPABILITIES;
//cpu capabilities structure
struct CAPABILITIES {
u32 hasFloatingPointUnit;
@ -137,9 +136,9 @@ namespace x86Emitter
// single-line functions anyway.
//
#ifdef PCSX2_DEVBUILD
#define __emitinline
# define __emitinline
#else
#define __emitinline __forceinline
# define __emitinline __forceinline
#endif
#ifdef _MSC_VER
@ -148,13 +147,18 @@ namespace x86Emitter
# define __noinline
#endif
static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field)
static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field)
class x86AddressInfo;
class ModSibBase;
extern void iSetPtr( void* ptr );
extern u8* iGetPtr();
extern void iAlignPtr( uint bytes );
extern void iAdvancePtr( uint bytes );
static __forceinline void write8( u8 val )
{
iWrite( val );
@ -195,7 +199,7 @@ namespace x86Emitter
x86Register(): Id( -1 ) {}
explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); }
bool IsEmpty() const { return Id == -1; }
bool IsEmpty() const { return Id < 0; }
// Returns true if the register is a valid accumulator: Eax, Ax, Al.
bool IsAccumulator() const { return Id == 0; }
@ -220,7 +224,7 @@ namespace x86Emitter
// ------------------------------------------------------------------------
// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
// means it finds undeclared variables when MSVC does not (Since MSVC compiles templates
// when they are actually used). In practice this sucks since it means we have to move all'
// when they are actually used). In practice this sucks since it means we have to move all
// our variable and function prototypes from a nicely/neatly unified location to being strewn
// all about the the templated code in haphazard fashion. Yay.. >_<
//
@ -476,6 +480,118 @@ namespace x86Emitter
extern const x86IndexerTypeExplicit<2> ptr16;
extern const x86IndexerTypeExplicit<1> ptr8;
//////////////////////////////////////////////////////////////////////////////////////////
// JccComparisonType - enumerated possibilities for inspired code branching!
//
enum JccComparisonType
{
Jcc_Unknown = -2,
Jcc_Unconditional = -1,
Jcc_Overflow = 0x0,
Jcc_NotOverflow = 0x1,
Jcc_Below = 0x2,
Jcc_Carry = 0x2,
Jcc_AboveOrEqual = 0x3,
Jcc_NotCarry = 0x3,
Jcc_Zero = 0x4,
Jcc_Equal = 0x4,
Jcc_NotZero = 0x5,
Jcc_NotEqual = 0x5,
Jcc_BelowOrEqual = 0x6,
Jcc_Above = 0x7,
Jcc_Signed = 0x8,
Jcc_Unsigned = 0x9,
Jcc_ParityEven = 0xa,
Jcc_ParityOdd = 0xb,
Jcc_Less = 0xc,
Jcc_GreaterOrEqual = 0xd,
Jcc_LessOrEqual = 0xe,
Jcc_Greater = 0xf,
};
// Not supported yet:
//E3 cb JECXZ rel8 Jump short if ECX register is 0.
//////////////////////////////////////////////////////////////////////////////////////////
// iSmartJump
// This class provides an interface for generating forward-based j8's or j32's "smartly"
// as per the measured displacement distance. If the displacement is a valid s8, then
// a j8 is inserted, else a j32.
//
// Performance Analysis: j8's use 4 less byes per opcode, and thus can provide
// minor speed benefits in the form of L1/L2 cache clutter. They're also notably faster
// on P4's, and mildly faster on AMDs. (Core2's and i7's don't care)
//
class iSmartJump
{
protected:
u8* m_target; // x86Ptr target address of this label
u8* m_baseptr; // base address of the instruction (passed to the instruction emitter)
JccComparisonType m_cc; // comparison type of the instruction
bool m_written; // set true when the jump is written (at which point the object becomes invalid)
public:
const int GetMaxInstructionSize() const
{
jASSUME( m_cc != Jcc_Unknown );
return ( m_cc == Jcc_Unconditional ) ? 5 : 6;
}
// Creates a backward jump label which will be passed into a Jxx instruction (or few!)
// later on, and the current x86Ptr is recorded as the target [thus making the class
// creation point the jump target].
iSmartJump()
{
m_target = iGetPtr();
m_baseptr = NULL;
m_cc = Jcc_Unknown;
m_written = false;
}
// ccType - Comparison type to be written back to the jump instruction position.
//
iSmartJump( JccComparisonType ccType )
{
jASSUME( ccType != Jcc_Unknown );
m_target = NULL;
m_baseptr = iGetPtr();
m_cc = ccType;
m_written = false;
iAdvancePtr( GetMaxInstructionSize() );
}
JccComparisonType GetCondition() const
{
return m_cc;
}
u8* GetTarget() const
{
return m_target;
}
void SetTarget();
};
//////////////////////////////////////////////////////////////////////////////////////////
//
template< typename OperandType >
class iForwardJump
{
public:
static const uint OperandSize = sizeof( OperandType );
// pointer to base of the instruction *Following* the jump. The jump address will be
// relative to this address.
s8* const BasePtr;
public:
iForwardJump( JccComparisonType cctype = Jcc_Unconditional );
void SetTarget() const;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
namespace Internal
@ -678,13 +794,6 @@ namespace x86Emitter
}
};
// if the immediate is zero, we can replace the instruction, or ignore it
// entirely, depending on the instruction being issued. That's what we do here.
// (returns FALSE if no optimization is performed)
// [TODO] : Work-in-progress!
//template< G1Type InstType, typename RegType >
//static __forceinline void _optimize_imm0( RegType to );
// -------------------------------------------------------------------
//
template< G1Type InstType >
@ -789,7 +898,6 @@ namespace x86Emitter
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); }
Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds.
};
@ -799,22 +907,85 @@ namespace x86Emitter
// importing Internal into x86Emitter, which done at the header file level would defeat
// the purpose!)
extern const Group1ImplAll<G1Type_ADD> ADD;
extern const Group1ImplAll<G1Type_OR> OR;
extern const Group1ImplAll<G1Type_ADC> ADC;
extern const Group1ImplAll<G1Type_SBB> SBB;
extern const Group1ImplAll<G1Type_AND> AND;
extern const Group1ImplAll<G1Type_SUB> SUB;
extern const Group1ImplAll<G1Type_XOR> XOR;
extern const Group1ImplAll<G1Type_CMP> CMP;
extern const Group1ImplAll<G1Type_ADD> iADD;
extern const Group1ImplAll<G1Type_OR> iOR;
extern const Group1ImplAll<G1Type_ADC> iADC;
extern const Group1ImplAll<G1Type_SBB> iSBB;
extern const Group1ImplAll<G1Type_AND> iAND;
extern const Group1ImplAll<G1Type_SUB> iSUB;
extern const Group1ImplAll<G1Type_XOR> iXOR;
extern const Group1ImplAll<G1Type_CMP> iCMP;
extern const Group2ImplAll<G2Type_ROL> ROL;
extern const Group2ImplAll<G2Type_ROR> ROR;
extern const Group2ImplAll<G2Type_RCL> RCL;
extern const Group2ImplAll<G2Type_RCR> RCR;
extern const Group2ImplAll<G2Type_SHL> SHL;
extern const Group2ImplAll<G2Type_SHR> SHR;
extern const Group2ImplAll<G2Type_SAR> SAR;
extern const Group2ImplAll<G2Type_ROL> iROL;
extern const Group2ImplAll<G2Type_ROR> iROR;
extern const Group2ImplAll<G2Type_RCL> iRCL;
extern const Group2ImplAll<G2Type_RCR> iRCR;
extern const Group2ImplAll<G2Type_SHL> iSHL;
extern const Group2ImplAll<G2Type_SHR> iSHR;
extern const Group2ImplAll<G2Type_SAR> iSAR;
//////////////////////////////////////////////////////////////////////////////////////////
// Mov with sign/zero extension implementations:
//
template< int DestOperandSize, int SrcOperandSize >
class MovExtendImpl
{
protected:
static bool Is8BitOperand() { return SrcOperandSize == 1; }
static void prefix16() { if( DestOperandSize == 2 ) iWrite<u8>( 0x66 ); }
static __forceinline void emit_base( bool SignExtend )
{
prefix16();
iWrite<u8>( 0x0f );
iWrite<u8>( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) );
}
public:
MovExtendImpl() {} // For the love of GCC.
static __emitinline void Emit( const x86Register<DestOperandSize>& to, const x86Register<SrcOperandSize>& from, bool SignExtend )
{
emit_base( SignExtend );
ModRM( 3, from.Id, to.Id );
}
static __emitinline void Emit( const x86Register<DestOperandSize>& to, const ModSibStrict<SrcOperandSize>& sibsrc, bool SignExtend )
{
emit_base( SignExtend );
EmitSibMagic( to.Id, sibsrc );
}
};
// ------------------------------------------------------------------------
template< bool SignExtend >
class MovExtendImplAll
{
protected:
typedef MovExtendImpl<4, 2> m_16to32;
typedef MovExtendImpl<4, 1> m_8to32;
public:
__forceinline void operator()( const x86Register32& to, const x86Register16& from ) const { m_16to32::Emit( to, from, SignExtend ); }
__noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); }
__forceinline void operator()( const x86Register32& to, const x86Register8& from ) const { m_8to32::Emit( to, from, SignExtend ); }
__noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); }
MovExtendImplAll() {} // don't ask.
};
// ------------------------------------------------------------------------
extern const MovExtendImplAll<true> iMOVSX;
extern const MovExtendImplAll<false> iMOVZX;
// if the immediate is zero, we can replace the instruction, or ignore it
// entirely, depending on the instruction being issued. That's what we do here.
// (returns FALSE if no optimization is performed)
// [TODO] : Work-in-progress!
//template< G1Type InstType, typename RegType >
//static __forceinline void _optimize_imm0( RegType to );
/*template< G1Type InstType, typename RegType >
static __forceinline void _optimize_imm0( const RegType& to )
@ -822,26 +993,26 @@ namespace x86Emitter
switch( InstType )
{
// ADD, SUB, and OR can be ignored if the imm is zero..
case G1Type_ADD:
case G1Type_SUB:
case G1Type_OR:
return true;
case G1Type_ADD:
case G1Type_SUB:
case G1Type_OR:
return true;
// ADC and SBB can never be ignored (could have carry bits)
// XOR behavior is distinct as well [or is it the same as NEG or NOT?]
case G1Type_ADC:
case G1Type_SBB:
case G1Type_XOR:
return false;
case G1Type_ADC:
case G1Type_SBB:
case G1Type_XOR:
return false;
// replace AND with XOR (or SUB works too.. whatever!)
case G1Type_AND:
XOR( to, to );
case G1Type_AND:
iXOR( to, to );
return true;
// replace CMP with OR reg,reg:
case G1Type_CMP:
OR( to, to );
case G1Type_CMP:
iOR( to, to );
return true;
jNO_DEFAULT