Fixed a bug in the Emitter the caused the VU1 to screw up a bit (bad gfx and freezeups and stuff). Also: Resolved some GCC/C++ troubles.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@974 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-14 12:37:48 +00:00
parent ac1ecd5812
commit be430d5d89
6 changed files with 363 additions and 253 deletions

View File

@ -2937,6 +2937,10 @@
RelativePath="..\..\x86\ix86\ix86_internal.h" RelativePath="..\..\x86\ix86\ix86_internal.h"
> >
</File> </File>
<File
RelativePath="..\..\x86\ix86\ix86_jmp.cpp"
>
</File>
<File <File
RelativePath="..\..\x86\ix86\ix86_legacy.cpp" RelativePath="..\..\x86\ix86\ix86_legacy.cpp"
> >

View File

@ -355,7 +355,7 @@ void recVUMI_IADD( VURegs *VU, int info )
if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg); if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg);
else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg); else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg);
else LEA32RRtoR(fdreg, fsreg, ftreg); else LEA32RRtoR(fdreg, fsreg, ftreg);
MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0 MOVZX32R16toR(fdreg, fdreg); // needed since don't know if fdreg's upper bits are 0
} }
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -802,20 +802,12 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
// (this is one of my test cases for the new emitter --air) // (this is one of my test cases for the new emitter --air)
using namespace x86Emitter; using namespace x86Emitter;
x86IndexReg thisreg( x86reg );
if ( x86reg >= 0 ) { if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000);
x86IndexReg thisreg( x86reg ); if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000);
if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000 ); if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000);
if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000 ); if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000);
if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000 );
if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000);
}
else {
if ( _X ) MOV(ptr32[offset], 0x00000000);
if ( _Y ) MOV(ptr32[offset+4], 0x00000000);
if ( _Z ) MOV(ptr32[offset+8], 0x00000000);
if ( _W ) MOV(ptr32[offset+14], 0x3f800000);
}
} }
return; return;
} }

View File

@ -62,7 +62,7 @@ __threadlocal u8 *x86Ptr;
__threadlocal u8 *j8Ptr[32]; __threadlocal u8 *j8Ptr[32];
__threadlocal u32 *j32Ptr[32]; __threadlocal u32 *j32Ptr[32];
XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; __threadlocal XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT };
namespace x86Emitter { namespace x86Emitter {
@ -72,9 +72,8 @@ const x86IndexerTypeExplicit<2> ptr16;
const x86IndexerTypeExplicit<1> ptr8; const x86IndexerTypeExplicit<1> ptr8;
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
const x86Register32 x86Register32::Empty;
const x86Register16 x86Register16::Empty; template< int OperandSize > const x86Register<OperandSize> x86Register<OperandSize>::Empty;
const x86Register8 x86Register8::Empty;
const x86IndexReg x86IndexReg::Empty; const x86IndexReg x86IndexReg::Empty;
const x86Register32 const x86Register32
@ -235,164 +234,16 @@ namespace Internal
using namespace Internal; using namespace Internal;
/*
emitterT void x86SetPtr( u8* ptr )
{
x86Ptr = ptr;
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86Ptr Label API
//
class x86Label
{
public:
class Entry
{
protected:
u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type)
u8* m_base; // base address of the instruction (passed to the instruction)
int m_cc; // comparison type of the instruction
public:
explicit Entry( int cc ) :
m_base( x86Ptr )
, m_writebackpos( writebackidx )
{
}
void Commit( const u8* target ) const
{
//uptr reltarget = (uptr)m_base - (uptr)target;
//*((u32*)&m_base[m_writebackpos]) = reltarget;
jASSUME( m_emit != NULL );
jASSUME( m_base != NULL );
return m_emit( m_base, target, m_cc );
}
};
protected:
u8* m_target; // x86Ptr target address of this label
Entry m_writebacks[8];
int m_writeback_curpos;
public:
// creates a label list with no valid target.
// Use x86LabelList::Set() to set a target prior to class destruction.
x86Label() : m_target()
{
}
x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() )
{
}
// Performs all address writebacks on destruction.
virtual ~x86Label()
{
IssueWritebacks();
}
void SetTarget() { m_address = x86Ptr; }
void SetTarget( void* addr ) { m_address = (u8*)addr; }
void Clear()
{
m_writeback_curpos = 0;
}
// Adds a jump/call instruction to this label for writebacks.
void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc )
{
jASSUME( m_writeback_curpos < MaxWritebacks );
m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) );
m_writeback_curpos++;
}
void IssueWritebacks() const
{
const std::list<Entry>::const_iterator& start = m_list_writebacks.
for( ; start!=end; start++ )
{
Entry& current = *start;
u8* donespot = current.Commit();
// Copy the data from the m_nextinst to the current location,
// and update any additional writebacks (but what about multiple labels?!?)
}
}
};
#endif
void JMP( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJMP, 0 );
}
void JLE( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJCC, 0 );
}
void x86SetJ8( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
*j8 = (u8)jump;
}
void x86SetJ8A( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
if( ((uptr)x86Ptr&0xf) > 4 ) {
uptr newjump = jump + 16-((uptr)x86Ptr&0xf);
if( newjump <= 0x7f ) {
jump = newjump;
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
}
}
*j8 = (u8)jump;
}
emitterT void x86SetJ32( u32* j32 )
{
*j32 = ( x86Ptr - (u8*)j32 ) - 4;
}
emitterT void x86SetJ32A( u32* j32 )
{
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
x86SetJ32(j32);
}
emitterT void x86Align( int bytes )
{
// forward align
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
}
*/
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Internal implementation of EmitSibMagic which has been custom tailored // Internal implementation of EmitSibMagic which has been custom tailored
// to optimize special forms of the Lea instructions accordingly, such // to optimize special forms of the Lea instructions accordingly, such
// as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg". // as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg".
// //
// preserve_flags - set to ture to disable use of SHL on [Index*Base] forms
// of LEA, which alters flags states.
//
template< typename ToReg > template< typename ToReg >
static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags )
{ {
int displacement_size = (src.Displacement == 0) ? 0 : int displacement_size = (src.Displacement == 0) ? 0 :
( ( src.IsByteSizeDisp() ) ? 1 : 2 ); ( ( src.IsByteSizeDisp() ) ? 1 : 2 );
@ -407,18 +258,12 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false )
if( src.Index.IsEmpty() ) if( src.Index.IsEmpty() )
{ {
if( is16bit ) MOV( to, src.Displacement );
MOV( to, src.Displacement );
else
MOV( to, src.Displacement );
return; return;
} }
else if( displacement_size == 0 ) else if( displacement_size == 0 )
{ {
if( is16bit ) MOV( to, ToReg( src.Index.Id ) );
MOV( to, ToReg( src.Index.Id ) );
else
MOV( to, ToReg( src.Index.Id ) );
return; return;
} }
else else
@ -434,11 +279,11 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false )
{ {
if( src.Base.IsEmpty() ) if( src.Base.IsEmpty() )
{ {
if( displacement_size == 0 ) if( !preserve_flags && (displacement_size == 0) )
{ {
// Encode [Index*Scale] as a combination of Mov and Shl. // Encode [Index*Scale] as a combination of Mov and Shl.
// This is more efficient because of the bloated LEA format which requires // This is more efficient because of the bloated LEA format which requires
// a 32 bit displacement, and the compact nature of the alterntive. // a 32 bit displacement, and the compact nature of the alternative.
// //
// (this does not apply to older model P4s with the broken barrel shifter, // (this does not apply to older model P4s with the broken barrel shifter,
// but we currently aren't optimizing for that target anyway). // but we currently aren't optimizing for that target anyway).
@ -479,16 +324,16 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false )
} }
} }
__emitinline void LEA( x86Register32 to, const ModSibBase& src ) __emitinline void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags )
{ {
EmitLeaMagic( to, src ); EmitLeaMagic( to, src, preserve_flags );
} }
__emitinline void LEA( x86Register16 to, const ModSibBase& src ) __emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags )
{ {
write8( 0x66 ); write8( 0x66 );
EmitLeaMagic( to, src ); EmitLeaMagic( to, src, preserve_flags );
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
@ -555,15 +400,10 @@ public:
static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm ) static __forceinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
{ {
// Note: MOV does not have (reg16/32,imm8) forms. // Note: MOV does not have (reg16/32,imm8) forms.
if( imm == 0 ) prefix16();
XOR( to, to ); iWrite<u8>( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id );
else iWrite<ImmType>( imm );
{
prefix16();
iWrite<u8>( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id );
iWrite<ImmType>( imm );
}
} }
static __forceinline void Emit( ModSibStrict<OperandSize> dest, ImmType imm ) static __forceinline void Emit( ModSibStrict<OperandSize> dest, ImmType imm )
@ -603,7 +443,13 @@ __noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { M
__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } __noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } __noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); }
void MOV( const x86Register32& to, u32 imm ) { MOV32i::Emit( to, imm ); } void MOV( const x86Register32& to, u32 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
XOR( to, to );
else
MOV32i::Emit( to, imm );
}
// ---------- 16 Bit Interface ----------- // ---------- 16 Bit Interface -----------
@ -614,8 +460,13 @@ __noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { M
__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } __noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } __noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); }
void MOV( const x86Register16& to, u16 imm ) { MOV16i::Emit( to, imm ); } void MOV( const x86Register16& to, u16 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
XOR( to, to );
else
MOV16i::Emit( to, imm );
}
// ---------- 8 Bit Interface ----------- // ---------- 8 Bit Interface -----------
__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } __forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); }
@ -625,7 +476,13 @@ __noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MO
__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } __noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); }
__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } __noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); }
void MOV( const x86Register8& to, u8 imm ) { MOV8i::Emit( to, imm ); } void MOV( const x86Register8& to, u8 imm, bool preserve_flags )
{
if( !preserve_flags && (imm == 0) )
XOR( to, to );
else
MOV8i::Emit( to, imm );
}
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////

View File

@ -40,8 +40,8 @@ namespace x86Emitter
// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
// instead. // instead.
extern void LEA( x86Register32 to, const ModSibBase& src ); extern void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags=false );
extern void LEA( x86Register16 to, const ModSibBase& src ); extern void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags=false );
// ----- Push / Pop Instructions ----- // ----- Push / Pop Instructions -----
@ -80,7 +80,9 @@ namespace x86Emitter
extern void MOV( const x86Register32& to, const void* src ); extern void MOV( const x86Register32& to, const void* src );
extern void MOV( const void* dest, const x86Register32& from ); extern void MOV( const void* dest, const x86Register32& from );
extern void MOV( const x86Register32& to, u32 imm ); // preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
extern void MOV( const x86Register32& to, u32 imm, bool preserve_flags=false );
extern void MOV( const ModSibStrict<4>& sibdest, u32 imm ); extern void MOV( const ModSibStrict<4>& sibdest, u32 imm );
// ---------- 16 Bit Interface ----------- // ---------- 16 Bit Interface -----------
@ -90,7 +92,9 @@ namespace x86Emitter
extern void MOV( const x86Register16& to, const void* src ); extern void MOV( const x86Register16& to, const void* src );
extern void MOV( const void* dest, const x86Register16& from ); extern void MOV( const void* dest, const x86Register16& from );
extern void MOV( const x86Register16& to, u16 imm ); // preserve_flags - set to true to disable optimizations which could alter the state of
// the flags (namely replacing mov reg,0 with xor).
extern void MOV( const x86Register16& to, u16 imm, bool preserve_flags=false );
extern void MOV( const ModSibStrict<2>& sibdest, u16 imm ); extern void MOV( const ModSibStrict<2>& sibdest, u16 imm );
// ---------- 8 Bit Interface ----------- // ---------- 8 Bit Interface -----------
@ -100,7 +104,7 @@ namespace x86Emitter
extern void MOV( const x86Register8& to, const void* src ); extern void MOV( const x86Register8& to, const void* src );
extern void MOV( const void* dest, const x86Register8& from ); extern void MOV( const void* dest, const x86Register8& from );
extern void MOV( const x86Register8& to, u8 imm ); extern void MOV( const x86Register8& to, u8 imm, bool preserve_flags=false );
extern void MOV( const ModSibStrict<1>& sibdest, u8 imm ); extern void MOV( const ModSibStrict<1>& sibdest, u8 imm );
} }

192
pcsx2/x86/ix86/ix86_jmp.cpp Normal file
View File

@ -0,0 +1,192 @@
/* Pcsx2 - Pc Ps2 Emulator
* Copyright (C) 2002-2009 Pcsx2 Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/*
* ix86 core v0.9.0
*
* Original Authors (v0.6.2 and prior):
* linuzappz <linuzappz@pcsx.net>
* alexey silinov
* goldfinger
* zerofrog(@gmail.com)
*
* Authors of v0.9.0:
* Jake.Stine(@gmail.com)
* cottonvibes(@gmail.com)
* sudonim(1@gmail.com)
*/
#include "PrecompiledHeader.h"
#include "System.h"
#include "ix86_internal.h"
// Another Work-in-Progress!!
/*
emitterT void x86SetPtr( u8* ptr )
{
x86Ptr = ptr;
}
//////////////////////////////////////////////////////////////////////////////////////////
// x86Ptr Label API
//
class x86Label
{
public:
class Entry
{
protected:
u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type)
u8* m_base; // base address of the instruction (passed to the instruction)
int m_cc; // comparison type of the instruction
public:
explicit Entry( int cc ) :
m_base( x86Ptr )
, m_writebackpos( writebackidx )
{
}
void Commit( const u8* target ) const
{
//uptr reltarget = (uptr)m_base - (uptr)target;
//*((u32*)&m_base[m_writebackpos]) = reltarget;
jASSUME( m_emit != NULL );
jASSUME( m_base != NULL );
return m_emit( m_base, target, m_cc );
}
};
protected:
u8* m_target; // x86Ptr target address of this label
Entry m_writebacks[8];
int m_writeback_curpos;
public:
// creates a label list with no valid target.
// Use x86LabelList::Set() to set a target prior to class destruction.
x86Label() : m_target()
{
}
x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() )
{
}
// Performs all address writebacks on destruction.
virtual ~x86Label()
{
IssueWritebacks();
}
void SetTarget() { m_address = x86Ptr; }
void SetTarget( void* addr ) { m_address = (u8*)addr; }
void Clear()
{
m_writeback_curpos = 0;
}
// Adds a jump/call instruction to this label for writebacks.
void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc )
{
jASSUME( m_writeback_curpos < MaxWritebacks );
m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) );
m_writeback_curpos++;
}
void IssueWritebacks() const
{
const std::list<Entry>::const_iterator& start = m_list_writebacks.
for( ; start!=end; start++ )
{
Entry& current = *start;
u8* donespot = current.Commit();
// Copy the data from the m_nextinst to the current location,
// and update any additional writebacks (but what about multiple labels?!?)
}
}
};
#endif
void JMP( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJMP, 0 );
}
void JLE( x86Label& dest )
{
dest.AddWriteback( x86Ptr, emitJCC, 0 );
}
void x86SetJ8( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
*j8 = (u8)jump;
}
void x86SetJ8A( u8* j8 )
{
u32 jump = ( x86Ptr - j8 ) - 1;
if ( jump > 0x7f ) {
Console::Error( "j8 greater than 0x7f!!" );
assert(0);
}
if( ((uptr)x86Ptr&0xf) > 4 ) {
uptr newjump = jump + 16-((uptr)x86Ptr&0xf);
if( newjump <= 0x7f ) {
jump = newjump;
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
}
}
*j8 = (u8)jump;
}
emitterT void x86SetJ32( u32* j32 )
{
*j32 = ( x86Ptr - (u8*)j32 ) - 4;
}
emitterT void x86SetJ32A( u32* j32 )
{
while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
x86SetJ32(j32);
}
emitterT void x86Align( int bytes )
{
// forward align
x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
}
*/

View File

@ -18,20 +18,6 @@
#pragma once #pragma once
// x86 opcode descriptors
#define XMMREGS 8
#define X86REGS 8
#define MMXREGS 8
enum XMMSSEType
{
XMMT_INT = 0, // integer (sse2 only)
XMMT_FPS = 1, // floating point
//XMMT_FPD = 3, // double
};
extern XMMSSEType g_xmmtypes[XMMREGS];
extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs
typedef struct CAPABILITIES CAPABILITIES; typedef struct CAPABILITIES CAPABILITIES;
@ -106,10 +92,23 @@ extern CPUINFO cpuinfo;
#define __threadlocal __thread #define __threadlocal __thread
#endif #endif
// x86 opcode descriptors
#define XMMREGS 8
#define X86REGS 8
#define MMXREGS 8
enum XMMSSEType
{
XMMT_INT = 0, // integer (sse2 only)
XMMT_FPS = 1, // floating point
//XMMT_FPD = 3, // double
};
extern __threadlocal u8 *x86Ptr; extern __threadlocal u8 *x86Ptr;
extern __threadlocal u8 *j8Ptr[32]; extern __threadlocal u8 *j8Ptr[32];
extern __threadlocal u32 *j32Ptr[32]; extern __threadlocal u32 *j32Ptr[32];
extern __threadlocal XMMSSEType g_xmmtypes[XMMREGS];
//------------------------------------------------------------------ //------------------------------------------------------------------
// templated version of is_s8 is required, so that u16's get correct sign extension treatment. // templated version of is_s8 is required, so that u16's get correct sign extension treatment.
@ -218,10 +217,45 @@ namespace x86Emitter
} }
}; };
// ------------------------------------------------------------------------
// Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which
// means it finds undeclared variables when MSVC does not (Since MSVC compiles templates
// when they are actually used). In practice this sucks since it means we have to move all'
// our variable and function prototypes from a nicely/neatly unified location to being strewn
// all about the the templated code in haphazard fashion. Yay.. >_<
//
typedef x86Register<4> x86Register32; typedef x86Register<4> x86Register32;
typedef x86Register<2> x86Register16; typedef x86Register<2> x86Register16;
typedef x86Register<1> x86Register8; typedef x86Register<1> x86Register8;
extern const x86Register32 eax;
extern const x86Register32 ebx;
extern const x86Register32 ecx;
extern const x86Register32 edx;
extern const x86Register32 esi;
extern const x86Register32 edi;
extern const x86Register32 ebp;
extern const x86Register32 esp;
extern const x86Register16 ax;
extern const x86Register16 bx;
extern const x86Register16 cx;
extern const x86Register16 dx;
extern const x86Register16 si;
extern const x86Register16 di;
extern const x86Register16 bp;
extern const x86Register16 sp;
extern const x86Register8 al;
extern const x86Register8 cl;
extern const x86Register8 dl;
extern const x86Register8 bl;
extern const x86Register8 ah;
extern const x86Register8 ch;
extern const x86Register8 dh;
extern const x86Register8 bh;
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Use 32 bit registers as out index register (for ModSib memory address calculations) // Use 32 bit registers as out index register (for ModSib memory address calculations)
// Only x86IndexReg provides operators for constructing x86AddressInfo types. // Only x86IndexReg provides operators for constructing x86AddressInfo types.
@ -400,7 +434,7 @@ namespace x86Emitter
return ModSibBase( (uptr)src ); return ModSibBase( (uptr)src );
} }
x86IndexerType() {} x86IndexerType() {} // applease the GCC gods
}; };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
@ -433,6 +467,8 @@ namespace x86Emitter
{ {
return ModSibStrict<OperandSize>( (uptr)src ); return ModSibStrict<OperandSize>( (uptr)src );
} }
x86IndexerTypeExplicit() {} // GCC initialization dummy
}; };
extern const x86IndexerType ptr; extern const x86IndexerType ptr;
@ -496,6 +532,8 @@ namespace x86Emitter
public: public:
static const uint OperandSize = sizeof(ImmType); static const uint OperandSize = sizeof(ImmType);
Group1Impl() {} // because GCC doesn't like static classes
protected: protected:
static bool Is8BitOperand() { return OperandSize == 1; } static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); } static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
@ -524,6 +562,7 @@ namespace x86Emitter
static __emitinline void Emit( const x86Register<OperandSize>& to, ImmType imm ) static __emitinline void Emit( const x86Register<OperandSize>& to, ImmType imm )
{ {
prefix16();
if( !Is8BitOperand() && is_s8( imm ) ) if( !Is8BitOperand() && is_s8( imm ) )
{ {
iWrite<u8>( 0x83 ); iWrite<u8>( 0x83 );
@ -532,7 +571,6 @@ namespace x86Emitter
} }
else else
{ {
prefix16();
if( to.IsAccumulator() ) if( to.IsAccumulator() )
iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); iWrite<u8>( (Is8BitOperand() ? 4 : 5) | (InstType<<3) );
else else
@ -576,6 +614,8 @@ namespace x86Emitter
public: public:
static const uint OperandSize = sizeof(ImmType); static const uint OperandSize = sizeof(ImmType);
Group2Impl() {} // For the love of GCC.
protected: protected:
static bool Is8BitOperand() { return OperandSize == 1; } static bool Is8BitOperand() { return OperandSize == 1; }
static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); } static void prefix16() { if( OperandSize == 2 ) iWrite<u8>( 0x66 ); }
@ -637,6 +677,13 @@ namespace x86Emitter
} }
} }
}; };
// if the immediate is zero, we can replace the instruction, or ignore it
// entirely, depending on the instruction being issued. That's what we do here.
// (returns FALSE if no optimization is performed)
// [TODO] : Work-in-progress!
//template< G1Type InstType, typename RegType >
//static __forceinline void _optimize_imm0( RegType to );
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// //
@ -656,7 +703,7 @@ namespace x86Emitter
// I've set up the inlining to be as practical and intelligent as possible, which means // I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// cretion of the compiler. // creation of the compiler.
// //
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
@ -670,7 +717,11 @@ namespace x86Emitter
__noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } __noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); }
void operator()( const x86Register32& to, u32 imm ) const { m_32i::Emit( to, imm ); } void operator()( const x86Register32& to, u32 imm, bool needs_flags=false ) const
{
//if( needs_flags || (imm != 0) || !_optimize_imm0() )
m_32i::Emit( to, imm );
}
// ---------- 16 Bit Interface ----------- // ---------- 16 Bit Interface -----------
__forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); } __forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); }
@ -680,7 +731,7 @@ namespace x86Emitter
__noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } __noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); }
void operator()( const x86Register16& to, u16 imm ) const { m_16i::Emit( to, imm ); } void operator()( const x86Register16& to, u16 imm, bool needs_flags=false ) const { m_16i::Emit( to, imm ); }
// ---------- 8 Bit Interface ----------- // ---------- 8 Bit Interface -----------
__forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); } __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); }
@ -690,7 +741,9 @@ namespace x86Emitter
__noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); } __noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } void operator()( const x86Register8& to, u8 imm, bool needs_flags=false ) const { m_8i::Emit( to, imm ); }
Group1ImplAll() {} // Why does GCC need these?
}; };
@ -712,7 +765,7 @@ namespace x86Emitter
// I've set up the inlining to be as practical and intelligent as possible, which means // I've set up the inlining to be as practical and intelligent as possible, which means
// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to
// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis-
// cretion of the compiler. // creation of the compiler.
// //
// (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution)
@ -735,6 +788,9 @@ namespace x86Emitter
__noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); } __noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); }
__noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); }
void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); }
Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds.
}; };
// Define the externals for Group1/2 instructions here (inside the Internal namespace). // Define the externals for Group1/2 instructions here (inside the Internal namespace).
@ -759,36 +815,41 @@ namespace x86Emitter
extern const Group2ImplAll<G2Type_SHL> SHL; extern const Group2ImplAll<G2Type_SHL> SHL;
extern const Group2ImplAll<G2Type_SHR> SHR; extern const Group2ImplAll<G2Type_SHR> SHR;
extern const Group2ImplAll<G2Type_SAR> SAR; extern const Group2ImplAll<G2Type_SAR> SAR;
/*template< G1Type InstType, typename RegType >
static __forceinline void _optimize_imm0( const RegType& to )
{
switch( InstType )
{
// ADD, SUB, and OR can be ignored if the imm is zero..
case G1Type_ADD:
case G1Type_SUB:
case G1Type_OR:
return true;
// ADC and SBB can never be ignored (could have carry bits)
// XOR behavior is distinct as well [or is it the same as NEG or NOT?]
case G1Type_ADC:
case G1Type_SBB:
case G1Type_XOR:
return false;
// replace AND with XOR (or SUB works too.. whatever!)
case G1Type_AND:
XOR( to, to );
return true;
// replace CMP with OR reg,reg:
case G1Type_CMP:
OR( to, to );
return true;
jNO_DEFAULT
}
return false;
}*/
} }
// ------------------------------------------------------------------------
extern const x86Register32 eax;
extern const x86Register32 ebx;
extern const x86Register32 ecx;
extern const x86Register32 edx;
extern const x86Register32 esi;
extern const x86Register32 edi;
extern const x86Register32 ebp;
extern const x86Register32 esp;
extern const x86Register16 ax;
extern const x86Register16 bx;
extern const x86Register16 cx;
extern const x86Register16 dx;
extern const x86Register16 si;
extern const x86Register16 di;
extern const x86Register16 bp;
extern const x86Register16 sp;
extern const x86Register8 al;
extern const x86Register8 cl;
extern const x86Register8 dl;
extern const x86Register8 bl;
extern const x86Register8 ah;
extern const x86Register8 ch;
extern const x86Register8 dh;
extern const x86Register8 bh;
} }
#include "ix86_inlines.inl" #include "ix86_inlines.inl"