diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 65f2f04ab1..a54cefe86a 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -804,10 +804,10 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) using namespace x86Emitter; x86IndexReg thisreg( x86reg ); - if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000); - if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000); - if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000); - if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000); + if ( _X ) iMOV(ptr32[thisreg+offset], 0x00000000); + if ( _Y ) iMOV(ptr32[thisreg+offset+4], 0x00000000); + if ( _Z ) iMOV(ptr32[thisreg+offset+8], 0x00000000); + if ( _W ) iMOV(ptr32[thisreg+offset+12], 0x3f800000); } return; } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 564a636c3e..99ac820221 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -25,6 +25,7 @@ #include "iR5900.h" using namespace vtlb_private; +using namespace x86Emitter; // NOTICE: This function *destroys* EAX!! // Moves 128 bits of memory from the source register ptr to the dest register ptr. @@ -33,22 +34,20 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { // (this is one of my test cases for the new emitter --air) - using namespace x86Emitter; - x86IndexReg src( srcRm ); x86IndexReg dest( destRm ); - MOV( eax, ptr[src] ); - MOV( ptr[dest], eax ); + iMOV( eax, ptr[src] ); + iMOV( ptr[dest], eax ); - MOV( eax, ptr[src+4] ); - MOV( ptr[dest+4], eax ); + iMOV( eax, ptr[src+4] ); + iMOV( ptr[dest+4], eax ); - MOV( eax, ptr[src+8] ); - MOV( ptr[dest+8], eax ); + iMOV( eax, ptr[src+8] ); + iMOV( ptr[dest+8], eax ); - MOV( eax, ptr[src+12] ); - MOV( ptr[dest+12], eax ); + iMOV( eax, ptr[src+12] ); + iMOV( ptr[dest+12], eax ); } /* @@ -166,6 +165,7 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) } } +// ------------------------------------------------------------------------ static void _vtlb_DynGen_IndirectRead( u32 bits ) { int szidx; @@ -188,6 +188,7 @@ static void _vtlb_DynGen_IndirectRead( u32 bits ) CALL32R(EAX); } +// ------------------------------------------------------------------------ // Recompiled input registers: // ecx = source addr to read from // edx = ptr to dest to write to @@ -199,17 +200,18 @@ void vtlb_DynGenRead64(u32 bits) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _fullread = JS8(0); + iForwardJS8 _fullread; _vtlb_DynGen_DirectRead( bits, false ); - u8* cont = JMP8(0); + iForwardJump8 cont; - x86SetJ8(_fullread); + _fullread.SetTarget(); + _vtlb_DynGen_IndirectRead( bits ); - - x86SetJ8(cont); + cont.SetTarget(); } +// ------------------------------------------------------------------------ // Recompiled input registers: // ecx - source address to read from // Returns read value in eax. @@ -221,12 +223,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _fullread = JS8(0); + iForwardJS8 _fullread; _vtlb_DynGen_DirectRead( bits, sign ); - u8* cont = JMP8(0); + iForwardJump8 cont; - x86SetJ8(_fullread); + _fullread.SetTarget(); _vtlb_DynGen_IndirectRead( bits ); // perform sign extension on the result: @@ -245,11 +247,10 @@ void vtlb_DynGenRead32(u32 bits, bool sign) else MOVZX32R16toR(EAX,EAX); } - - x86SetJ8(cont); + cont.SetTarget(); } -// +// ------------------------------------------------------------------------ // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) @@ -317,6 +318,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) } } +// ------------------------------------------------------------------------ // Recompiled input registers: // ecx - source address to read from // Returns read value in eax. @@ -449,6 +451,7 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) } } +// ------------------------------------------------------------------------ static void _vtlb_DynGen_IndirectWrite( u32 bits ) { int szidx=0; @@ -468,24 +471,26 @@ static void _vtlb_DynGen_IndirectWrite( u32 bits ) CALL32R(EAX); } +// ------------------------------------------------------------------------ void vtlb_DynGenWrite(u32 sz) { MOV32RtoR(EAX,ECX); SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _full=JS8(0); + iForwardJS8 _full; _vtlb_DynGen_DirectWrite( sz ); - u8* cont = JMP8(0); + iForwardJump8 cont; - x86SetJ8(_full); + _full.SetTarget(); _vtlb_DynGen_IndirectWrite( sz ); - x86SetJ8(cont); + cont.SetTarget(); } +// ------------------------------------------------------------------------ // Generates code for a store instruction, where the address is a known constant. // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 35bd1791b5..1f4656d778 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -96,22 +96,25 @@ const x86Register8 namespace Internal { - const Group1ImplAll ADD; - const Group1ImplAll OR; - const Group1ImplAll ADC; - const Group1ImplAll SBB; - const Group1ImplAll AND; - const Group1ImplAll SUB; - const Group1ImplAll XOR; - const Group1ImplAll CMP; + const Group1ImplAll iADD; + const Group1ImplAll iOR; + const Group1ImplAll iADC; + const Group1ImplAll iSBB; + const Group1ImplAll iAND; + const Group1ImplAll iSUB; + const Group1ImplAll iXOR; + const Group1ImplAll iCMP; - const Group2ImplAll ROL; - const Group2ImplAll ROR; - const Group2ImplAll RCL; - const Group2ImplAll RCR; - const Group2ImplAll SHL; - const Group2ImplAll SHR; - const Group2ImplAll SAR; + const Group2ImplAll iROL; + const Group2ImplAll iROR; + const Group2ImplAll iRCL; + const Group2ImplAll iRCR; + const Group2ImplAll iSHL; + const Group2ImplAll iSHR; + const Group2ImplAll iSAR; + + const MovExtendImplAll iMOVSX; + const MovExtendImplAll iMOVZX; // Performance note: VC++ wants to use byte/word register form for the following // ModRM/SibSB constructors if we use iWrite, and furthermore unrolls the @@ -149,11 +152,9 @@ namespace Internal // instruction ca be encoded as ModRm alone. static __forceinline bool NeedsSibMagic( const ModSibBase& info ) { - // If base register is ESP, then we need a SIB: - if( info.Base.IsStackPointer() ) return true; - // no registers? no sibs! - // (ModSibBase::Reduce + // (ModSibBase::Reduce always places a register in Index, and optionally leaves + // Base empty if only register is specified) if( info.Index.IsEmpty() ) return false; // A scaled register needs a SIB @@ -188,7 +189,7 @@ namespace Internal if( info.Index.IsEmpty() ) { ModRM( 0, regfield, ModRm_UseDisp32 ); - iWrite( info.Displacement ); + iWrite( info.Displacement ); return; } else @@ -211,7 +212,7 @@ namespace Internal { ModRM( 0, regfield, ModRm_UseSib ); SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); - iWrite( info.Displacement ); + iWrite( info.Displacement ); return; } else @@ -226,7 +227,7 @@ namespace Internal if( displacement_size != 0 ) { - *(u32*)x86Ptr = info.Displacement; + *(s32*)x86Ptr = info.Displacement; x86Ptr += (displacement_size == 1) ? 1 : 4; } } @@ -234,6 +235,44 @@ namespace Internal using namespace Internal; +// ------------------------------------------------------------------------ +// Assigns the current emitter buffer target address. +// This is provided instead of using x86Ptr directly, since we may in the future find +// a need to change the storage class system for the x86Ptr 'under the hood.' +__emitinline void iSetPtr( void* ptr ) +{ + x86Ptr = (u8*)ptr; +} + +// ------------------------------------------------------------------------ +// Retrieves the current emitter buffer target address. +// This is provided instead of using x86Ptr directly, since we may in the future find +// a need to change the storage class system for the x86Ptr 'under the hood.' +__emitinline u8* iGetPtr() +{ + return x86Ptr; +} + +// ------------------------------------------------------------------------ +__emitinline void iAlignPtr( uint bytes ) +{ + // forward align + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) ); +} + +// ------------------------------------------------------------------------ +__emitinline void iAdvancePtr( uint bytes ) +{ + if( IsDevBuild ) + { + // common debugger courtesy: advance with INT3 as filler. + for( uint i=0; i( 0xcc ); + } + else + x86Ptr += bytes; +} + // ------------------------------------------------------------------------ // Internal implementation of EmitSibMagic which has been custom tailored // to optimize special forms of the Lea instructions accordingly, such @@ -258,21 +297,33 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) if( src.Index.IsEmpty() ) { - MOV( to, src.Displacement ); + iMOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { - MOV( to, ToReg( src.Index.Id ) ); + iMOV( to, ToReg( src.Index.Id ) ); return; } else { - // note: no need to do ebp+0 check since we encode all 0 displacements as - // register assignments above (via MOV) + if( !preserve_flags ) + { + // encode as MOV and ADD combo. Make sure to use the immediate on the + // ADD since it can encode as an 8-bit sign-extended value. + + iMOV( to, ToReg( src.Index.Id ) ); + iADD( to, src.Displacement ); + return; + } + else + { + // note: no need to do ebp+0 check since we encode all 0 displacements as + // register assignments above (via MOV) - iWrite( 0x8d ); - ModRM( displacement_size, to.Id, src.Index.Id ); + iWrite( 0x8d ); + ModRM( displacement_size, to.Id, src.Index.Id ); + } } } else @@ -288,8 +339,8 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) // (this does not apply to older model P4s with the broken barrel shifter, // but we currently aren't optimizing for that target anyway). - MOV( to, ToReg( src.Index.Id ) ); - SHL( to, src.Scale ); + iMOV( to, ToReg( src.Index.Id ) ); + iSHL( to, src.Scale ); return; } iWrite( 0x8d ); @@ -300,26 +351,46 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) } else { + if( src.Scale == 0 ) + { + if( !preserve_flags ) + { + if( src.Index == esp ) + { + // ESP is not encodable as an index (ix86 ignores it), thus: + iMOV( to, ToReg( src.Base.Id ) ); // will do the trick! + iADD( to, src.Displacement ); + return; + } + else if( src.Displacement == 0 ) + { + iMOV( to, ToReg( src.Base.Id ) ); + iADD( to, ToReg( src.Index.Id ) ); + return; + } + } + else if( (src.Index == esp) && (src.Displacement == 0) ) + { + // special case handling of ESP as Index, which is replaceable with + // a single MOV even when preserve_flags is set! :D + + iMOV( to, ToReg( src.Base.Id ) ); + return; + } + } + if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! iWrite( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); - - /*switch( displacement_size ) - { - case 0: break; - case 1: emit.write( src.Displacement ); break; - case 2: emit.write( src.Displacement ); break; - jNO_DEFAULT - }*/ } } if( displacement_size != 0 ) { - *(u32*)x86Ptr = src.Displacement; + *(s32*)x86Ptr = src.Displacement; x86Ptr += (displacement_size == 1) ? 1 : 4; } } @@ -350,6 +421,7 @@ protected: static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } public: + // ------------------------------------------------------------------------ static __forceinline void Emit( const x86Register& to, const x86Register& from ) { if( to == from ) return; // ignore redundant MOVs. @@ -359,6 +431,7 @@ public: ModRM( 3, from.Id, to.Id ); } + // ------------------------------------------------------------------------ static __forceinline void Emit( const ModSibBase& dest, const x86Register& from ) { prefix16(); @@ -378,6 +451,7 @@ public: } } + // ------------------------------------------------------------------------ static __forceinline void Emit( const x86Register& to, const ModSibBase& src ) { prefix16(); @@ -397,6 +471,7 @@ public: } } + // ------------------------------------------------------------------------ static __forceinline void Emit( const x86Register& to, ImmType imm ) { // Note: MOV does not have (reg16/32,imm8) forms. @@ -406,6 +481,7 @@ public: iWrite( imm ); } + // ------------------------------------------------------------------------ static __forceinline void Emit( ModSibStrict dest, ImmType imm ) { prefix16(); @@ -436,50 +512,50 @@ namespace Internal // TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D // ---------- 32 Bit Interface ----------- -__forceinline void MOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } -__forceinline void MOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } -__forceinline void MOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } -__noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } -__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } -__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } +__forceinline void iMOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } +__forceinline void iMOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } +__forceinline void iMOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } +__noinline void iMOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } +__noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } -void MOV( const x86Register32& to, u32 imm, bool preserve_flags ) +void iMOV( const x86Register32& to, u32 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) - XOR( to, to ); + iXOR( to, to ); else MOV32i::Emit( to, imm ); } // ---------- 16 Bit Interface ----------- -__forceinline void MOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } -__forceinline void MOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } -__forceinline void MOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } -__noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } -__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } -__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } +__forceinline void iMOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } +__forceinline void iMOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } +__forceinline void iMOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } +__noinline void iMOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } +__noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } -void MOV( const x86Register16& to, u16 imm, bool preserve_flags ) +void iMOV( const x86Register16& to, u16 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) - XOR( to, to ); + iXOR( to, to ); else MOV16i::Emit( to, imm ); } // ---------- 8 Bit Interface ----------- -__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } -__forceinline void MOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } -__forceinline void MOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } -__noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } -__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } -__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } +__forceinline void iMOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } +__forceinline void iMOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } +__forceinline void iMOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } +__noinline void iMOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } +__noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } -void MOV( const x86Register8& to, u8 imm, bool preserve_flags ) +void iMOV( const x86Register8& to, u8 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) - XOR( to, to ); + iXOR( to, to ); else MOV8i::Emit( to, imm ); } @@ -510,7 +586,8 @@ __forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); } __emitinline void POP( const ModSibBase& from ) { - iWrite( 0x8f ); Internal::EmitSibMagic( 0, from ); + iWrite( 0x8f ); + Internal::EmitSibMagic( 0, from ); } __forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } @@ -518,7 +595,8 @@ __forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); } __emitinline void PUSH( const ModSibBase& from ) { - iWrite( 0xff ); Internal::EmitSibMagic( 6, from ); + iWrite( 0xff ); + Internal::EmitSibMagic( 6, from ); } // pushes the EFLAGS register onto the stack diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp deleted file mode 100644 index f76950c0ef..0000000000 --- a/pcsx2/x86/ix86/ix86_group1.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "PrecompiledHeader.h" -#include "ix86_internal.h" - -//------------------------------------------------------------------ -// x86 Group 1 Instructions -//------------------------------------------------------------------ -// Group 1 instructions all adhere to the same encoding scheme, and so they all -// share the same emitter which has been coded here. -// -// Group 1 Table: [column value is the Reg field of the ModRM byte] -// -// 0 1 2 3 4 5 6 7 -// ADD OR ADC SBB AND SUB XOR CMP -// - -namespace x86Emitter { - -////////////////////////////////////////////////////////////////////////////////////////// -// x86RegConverter - this class is used internally by the emitter as a helper for -// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API -// can use type-safe 8/16/32 bit register types, and the underlying code can use a single -// unified emitter to generate all function variations + prefixes and such. :) -// -class x86RegConverter : public x86Register32 -{ -public: - x86RegConverter( x86Register32 src ) : x86Register32( src ) {} - x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {} - x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {} -}; - -enum Group1InstructionType -{ - G1Type_ADD=0, - G1Type_OR, - G1Type_ADC, - G1Type_SBB, - G1Type_AND, - G1Type_SUB, - G1Type_XOR, - G1Type_CMP -}; - - -static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false ) -{ - write8( (bit8form ? 0 : 1) | (inst<<3) ); - ModRM( 3, from.Id, to.Id ); -} - -static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false ) -{ - write8( (bit8form ? 0 : 1) | (inst<<3) ); - EmitSibMagic( from, sibdest ); -} - -static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false ) -{ - write8( (bit8form ? 2 : 3) | (inst<<3) ); - EmitSibMagic( to, sibsrc ); -} - -// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit -// instruction (AX,BX,etc). -template< typename T > -static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm ) -{ - bool bit8form = (sizeof(T) == 1); - - if( !bit8form && is_s8( imm ) ) - { - write8( 0x83 ); - ModRM( 3, inst, to.Id ); - write8( (s8)imm ); - } - else - { - if( to == eax ) - write8( (bit8form ? 4 : 5) | (inst<<3) ); - else - { - write8( bit8form ? 0x80 : 0x81 ); - ModRM( 3, inst, to.Id ); - } - x86write( imm ); - } -} - -// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit -// instruction (AX,BX,etc). -template< typename T > -static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) -{ - bool bit8form = (sizeof(T) == 1); - - write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) ); - - EmitSibMagic( inst, sibdest ); - - if( !bit8form && is_s8( imm ) ) - write8( (s8)imm ); - else - x86write( imm ); -} - -// 16 bit instruction prefix! -static __forceinline void prefix16() { write8(0x66); } - -////////////////////////////////////////////////////////////////////////////////////////// -// -#define DEFINE_GROUP1_OPCODE( cod ) \ - emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - \ - emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ - \ - emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ - emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \ - emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \ - emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ - emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \ - emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } - -DEFINE_GROUP1_OPCODE( ADD ) -DEFINE_GROUP1_OPCODE( CMP ) -DEFINE_GROUP1_OPCODE( OR ) -DEFINE_GROUP1_OPCODE( ADC ) -DEFINE_GROUP1_OPCODE( SBB ) -DEFINE_GROUP1_OPCODE( AND ) -DEFINE_GROUP1_OPCODE( SUB ) -DEFINE_GROUP1_OPCODE( XOR ) - -} // end namespace x86Emitter - - -static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src ) -{ - return x86Emitter::x86Register32( src ); -} - -static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) -{ - return x86Emitter::x86Register16( src ); -} - -static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src ) -{ - return x86Emitter::x86Register8( src ); -} - -static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src ) -{ - return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// -#define DEFINE_LEGACY_HELPER( cod, bits ) \ - emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \ - emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \ - emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \ - emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); } - -#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \ - DEFINE_LEGACY_HELPER( cod, 32 ) \ - DEFINE_LEGACY_HELPER( cod, 16 ) \ - DEFINE_LEGACY_HELPER( cod, 8 ) - -DEFINE_GROUP1_OPCODE_LEGACY( ADD ) -DEFINE_GROUP1_OPCODE_LEGACY( CMP ) -DEFINE_GROUP1_OPCODE_LEGACY( OR ) -DEFINE_GROUP1_OPCODE_LEGACY( ADC ) -DEFINE_GROUP1_OPCODE_LEGACY( SBB ) -DEFINE_GROUP1_OPCODE_LEGACY( AND ) -DEFINE_GROUP1_OPCODE_LEGACY( SUB ) -DEFINE_GROUP1_OPCODE_LEGACY( XOR ) - -// Special forms needed by the legacy emitter syntax: - -emitterT void AND32I8toR( x86IntRegType to, s8 from ) -{ - x86Emitter::AND32( _reghlp32(to), from ); -} - -emitterT void AND32I8toM( uptr to, s8 from ) -{ - x86Emitter::AND32( (void*)to, from ); -} diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index 69cc3b03f5..3fe2ec54fb 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -234,4 +234,58 @@ namespace x86Emitter return *this; } + + ////////////////////////////////////////////////////////////////////////////////////////// + // + + // ------------------------------------------------------------------------ + template< typename OperandType > + iForwardJump::iForwardJump( JccComparisonType cctype ) : + BasePtr( (s8*)iGetPtr() + + ((OperandSize == 1) ? 2 : // j8's are always 2 bytes. + ((cctype==Jcc_Unconditional) ? 5 : 6 )) // j32's are either 5 or 6 bytes + ) + { + jASSUME( cctype != Jcc_Unknown ); + jASSUME( OperandSize == 1 || OperandSize == 4 ); + + if( OperandSize == 1 ) + iWrite( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) ); + else + { + if( cctype == Jcc_Unconditional ) + iWrite( 0xe9 ); + else + { + iWrite( 0x0f ); + iWrite( 0x80 | cctype ); + } + } + + iAdvancePtr( OperandSize ); + } + + // ------------------------------------------------------------------------ + template< typename OperandType > + void iForwardJump::SetTarget() const + { + jASSUME( BasePtr != NULL ); + + sptr displacement = (sptr)iGetPtr() - (sptr)BasePtr; + if( OperandSize == 1 ) + { + if( !is_s8( displacement ) ) + { + assert( false ); + Console::Error( "Emitter Error: Invalid short jump displacement = 0x%x", params (int)displacement ); + } + BasePtr[-1] = (s8)displacement; + } + else + { + // full displacement, no sanity checks needed :D + ((s32*)BasePtr)[-1] = displacement; + } + } + } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 0f218d1a0b..48aa07808e 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -35,6 +35,9 @@ namespace x86Emitter { + extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false ); + + // ----- Lea Instructions (Load Effective Address) ----- // Note: alternate (void*) forms of these instructions are not provided since those // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs @@ -56,56 +59,153 @@ namespace x86Emitter static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } // ------------------------------------------------------------------------ - using Internal::ADD; - using Internal::OR; - using Internal::ADC; - using Internal::SBB; - using Internal::AND; - using Internal::SUB; - using Internal::XOR; - using Internal::CMP; + using Internal::iADD; + using Internal::iOR; + using Internal::iADC; + using Internal::iSBB; + using Internal::iAND; + using Internal::iSUB; + using Internal::iXOR; + using Internal::iCMP; - using Internal::ROL; - using Internal::ROR; - using Internal::RCL; - using Internal::RCR; - using Internal::SHL; - using Internal::SHR; - using Internal::SAR; + using Internal::iROL; + using Internal::iROR; + using Internal::iRCL; + using Internal::iRCR; + using Internal::iSHL; + using Internal::iSHR; + using Internal::iSAR; + using Internal::iMOVSX; + using Internal::iMOVZX; + + ////////////////////////////////////////////////////////////////////////////////////////// + // MOV instructions! // ---------- 32 Bit Interface ----------- - extern void MOV( const x86Register32& to, const x86Register32& from ); - extern void MOV( const ModSibBase& sibdest, const x86Register32& from ); - extern void MOV( const x86Register32& to, const ModSibBase& sibsrc ); - extern void MOV( const x86Register32& to, const void* src ); - extern void MOV( const void* dest, const x86Register32& from ); + extern void iMOV( const x86Register32& to, const x86Register32& from ); + extern void iMOV( const ModSibBase& sibdest, const x86Register32& from ); + extern void iMOV( const x86Register32& to, const ModSibBase& sibsrc ); + extern void iMOV( const x86Register32& to, const void* src ); + extern void iMOV( const void* dest, const x86Register32& from ); // preserve_flags - set to true to disable optimizations which could alter the state of // the flags (namely replacing mov reg,0 with xor). - extern void MOV( const x86Register32& to, u32 imm, bool preserve_flags=false ); - extern void MOV( const ModSibStrict<4>& sibdest, u32 imm ); + extern void iMOV( const x86Register32& to, u32 imm, bool preserve_flags=false ); + extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm ); // ---------- 16 Bit Interface ----------- - extern void MOV( const x86Register16& to, const x86Register16& from ); - extern void MOV( const ModSibBase& sibdest, const x86Register16& from ); - extern void MOV( const x86Register16& to, const ModSibBase& sibsrc ); - extern void MOV( const x86Register16& to, const void* src ); - extern void MOV( const void* dest, const x86Register16& from ); + extern void iMOV( const x86Register16& to, const x86Register16& from ); + extern void iMOV( const ModSibBase& sibdest, const x86Register16& from ); + extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc ); + extern void iMOV( const x86Register16& to, const void* src ); + extern void iMOV( const void* dest, const x86Register16& from ); // preserve_flags - set to true to disable optimizations which could alter the state of // the flags (namely replacing mov reg,0 with xor). - extern void MOV( const x86Register16& to, u16 imm, bool preserve_flags=false ); - extern void MOV( const ModSibStrict<2>& sibdest, u16 imm ); + extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false ); + extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm ); // ---------- 8 Bit Interface ----------- - extern void MOV( const x86Register8& to, const x86Register8& from ); - extern void MOV( const ModSibBase& sibdest, const x86Register8& from ); - extern void MOV( const x86Register8& to, const ModSibBase& sibsrc ); - extern void MOV( const x86Register8& to, const void* src ); - extern void MOV( const void* dest, const x86Register8& from ); + extern void iMOV( const x86Register8& to, const x86Register8& from ); + extern void iMOV( const ModSibBase& sibdest, const x86Register8& from ); + extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc ); + extern void iMOV( const x86Register8& to, const void* src ); + extern void iMOV( const void* dest, const x86Register8& from ); - extern void MOV( const x86Register8& to, u8 imm, bool preserve_flags=false ); - extern void MOV( const ModSibStrict<1>& sibdest, u8 imm ); + extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false ); + extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm ); + ////////////////////////////////////////////////////////////////////////////////////////// + // JMP / Jcc Instructions! + +#define DEFINE_FORWARD_JUMP( label, cond ) \ + template< typename OperandType > \ + class iForward##label : public iForwardJump \ + { \ + public: \ + iForward##label() : iForwardJump( cond ) {} \ + }; + + // ------------------------------------------------------------------------ + // Note: typedefs below are defined individually in order to appease Intellisense + // resolution. Including them into the class definition macro above breaks it. + + typedef iForwardJump iForwardJump8; + typedef iForwardJump iForwardJump32; + + + DEFINE_FORWARD_JUMP( JA, Jcc_Above ); + DEFINE_FORWARD_JUMP( JB, Jcc_Below ); + DEFINE_FORWARD_JUMP( JAE, Jcc_AboveOrEqual ); + DEFINE_FORWARD_JUMP( JBE, Jcc_BelowOrEqual ); + + typedef iForwardJA iForwardJA8; + typedef iForwardJA iForwardJA32; + typedef iForwardJB iForwardJB8; + typedef iForwardJB iForwardJB32; + typedef iForwardJAE iForwardJAE8; + typedef iForwardJAE iForwardJAE32; + typedef iForwardJBE iForwardJBE8; + typedef iForwardJBE iForwardJBE32; + + DEFINE_FORWARD_JUMP( JG, Jcc_Greater ); + DEFINE_FORWARD_JUMP( JL, Jcc_Less ); + DEFINE_FORWARD_JUMP( JGE, Jcc_GreaterOrEqual ); + DEFINE_FORWARD_JUMP( JLE, Jcc_LessOrEqual ); + + typedef iForwardJG iForwardJG8; + typedef iForwardJG iForwardJG32; + typedef iForwardJL iForwardJL8; + typedef iForwardJL iForwardJL32; + typedef iForwardJGE iForwardJGE8; + typedef iForwardJGE iForwardJGE32; + typedef iForwardJLE iForwardJLE8; + typedef iForwardJLE iForwardJLE32; + + DEFINE_FORWARD_JUMP( JZ, Jcc_Zero ); + DEFINE_FORWARD_JUMP( JE, Jcc_Equal ); + DEFINE_FORWARD_JUMP( JNZ, Jcc_NotZero ); + DEFINE_FORWARD_JUMP( JNE, Jcc_NotEqual ); + + typedef iForwardJZ iForwardJZ8; + typedef iForwardJZ iForwardJZ32; + typedef iForwardJE iForwardJE8; + typedef iForwardJE iForwardJE32; + typedef iForwardJNZ iForwardJNZ8; + typedef iForwardJNZ iForwardJNZ32; + typedef iForwardJNE iForwardJNE8; + typedef iForwardJNE iForwardJNE32; + + DEFINE_FORWARD_JUMP( JS, Jcc_Signed ); + DEFINE_FORWARD_JUMP( JNS, Jcc_Unsigned ); + + typedef iForwardJS iForwardJS8; + typedef iForwardJS iForwardJS32; + typedef iForwardJNS iForwardJNS8; + typedef iForwardJNS iForwardJNS32; + + DEFINE_FORWARD_JUMP( JO, Jcc_Overflow ); + DEFINE_FORWARD_JUMP( JNO, Jcc_NotOverflow ); + + typedef iForwardJO iForwardJO8; + typedef iForwardJO iForwardJO32; + typedef iForwardJNO iForwardJNO8; + typedef iForwardJNO iForwardJNO32; + + DEFINE_FORWARD_JUMP( JC, Jcc_Carry ); + DEFINE_FORWARD_JUMP( JNC, Jcc_NotCarry ); + + typedef iForwardJC iForwardJC8; + typedef iForwardJC iForwardJC32; + typedef iForwardJNC iForwardJNC8; + typedef iForwardJNC iForwardJNC32; + + DEFINE_FORWARD_JUMP( JPE, Jcc_ParityEven ); + DEFINE_FORWARD_JUMP( JPO, Jcc_ParityOdd ); + + typedef iForwardJPE iForwardJPE8; + typedef iForwardJPE iForwardJPE32; + typedef iForwardJPO iForwardJPO8; + typedef iForwardJPO iForwardJPO32; } diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index 4b2c11eb17..20fde0d093 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -36,157 +36,83 @@ #include "System.h" #include "ix86_internal.h" +namespace x86Emitter { -// Another Work-in-Progress!! - - -/* -emitterT void x86SetPtr( u8* ptr ) +// ------------------------------------------------------------------------ +void iSmartJump::SetTarget() { - x86Ptr = ptr; + jASSUME( !m_written ); + if( m_written ) + throw Exception::InvalidOperation( "Attempted to set SmartJump label multiple times." ); + + m_target = iGetPtr(); + if( m_baseptr == NULL ) return; + + iSetPtr( m_baseptr ); + u8* const saveme = m_baseptr + GetMaxInstructionSize(); + iJccKnownTarget( m_cc, m_target, true ); + + // Copy recompiled data inward if the jump instruction didn't fill the + // alloted buffer (means that we optimized things to a j8!) + + const int spacer = (sptr)saveme - (sptr)iGetPtr(); + if( spacer != 0 ) + { + u8* destpos = iGetPtr(); + const int copylen = (sptr)m_target - (sptr)saveme; + + memcpy_fast( destpos, saveme, copylen ); + iSetPtr( m_target - spacer ); + } + + m_written = true; } ////////////////////////////////////////////////////////////////////////////////////////// -// x86Ptr Label API // -class x86Label +// ------------------------------------------------------------------------ +// Writes a jump at the current x86Ptr, which targets a pre-established target address. +// (usually a backwards jump) +// +// slideForward - used internally by iSmartJump to indicate that the jump target is going +// to slide forward in the event of an 8 bit displacement. +// +__emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) { -public: - class Entry - { - protected: - u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type) - u8* m_base; // base address of the instruction (passed to the instruction) - int m_cc; // comparison type of the instruction - - public: - explicit Entry( int cc ) : - m_base( x86Ptr ) - , m_writebackpos( writebackidx ) - { - } + // Calculate the potential j8 displacement first, assuming an instruction length of 2: + sptr displacement8 = (sptr)target - ((sptr)iGetPtr() + 2); - void Commit( const u8* target ) const - { - //uptr reltarget = (uptr)m_base - (uptr)target; - //*((u32*)&m_base[m_writebackpos]) = reltarget; - jASSUME( m_emit != NULL ); - jASSUME( m_base != NULL ); - return m_emit( m_base, target, m_cc ); - } - }; + const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0; + displacement8 -= slideVal; -protected: - u8* m_target; // x86Ptr target address of this label - Entry m_writebacks[8]; - int m_writeback_curpos; - -public: - // creates a label list with no valid target. - // Use x86LabelList::Set() to set a target prior to class destruction. - x86Label() : m_target() - { - } - - x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() ) - { - } + // if the following assert fails it means we accidentally used slideForard on a backward + // jump (which is an invalid operation since there's nothing to slide forward). + if( slideForward ) jASSUME( displacement8 >= 0 ); - // Performs all address writebacks on destruction. - virtual ~x86Label() + if( is_s8( displacement8 ) ) { - IssueWritebacks(); + iWrite( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); + iWrite( displacement8 ); } + else + { + // Perform a 32 bit jump instead. :( - void SetTarget() { m_address = x86Ptr; } - void SetTarget( void* addr ) { m_address = (u8*)addr; } - - void Clear() - { - m_writeback_curpos = 0; - } - - // Adds a jump/call instruction to this label for writebacks. - void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc ) - { - jASSUME( m_writeback_curpos < MaxWritebacks ); - m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) ); - m_writeback_curpos++; - } - - void IssueWritebacks() const - { - const std::list::const_iterator& start = m_list_writebacks. - for( ; start!=end; start++ ) + if( comparison == Jcc_Unconditional ) + iWrite( 0xe9 ); + else { - Entry& current = *start; - u8* donespot = current.Commit(); - - // Copy the data from the m_nextinst to the current location, - // and update any additional writebacks (but what about multiple labels?!?) - + iWrite( 0x0f ); + iWrite( 0x80 | comparison ); } + iWrite( (sptr)target - ((sptr)iGetPtr() + 4) ); } -}; -#endif - -void JMP( x86Label& dest ) -{ - dest.AddWriteback( x86Ptr, emitJMP, 0 ); } -void JLE( x86Label& dest ) +__emitinline void iJcc( JccComparisonType comparison, void* target ) { - dest.AddWriteback( x86Ptr, emitJCC, 0 ); + iJccKnownTarget( comparison, target ); } -void x86SetJ8( u8* j8 ) -{ - u32 jump = ( x86Ptr - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - *j8 = (u8)jump; -} - -void x86SetJ8A( u8* j8 ) -{ - u32 jump = ( x86Ptr - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - - if( ((uptr)x86Ptr&0xf) > 4 ) { - - uptr newjump = jump + 16-((uptr)x86Ptr&0xf); - - if( newjump <= 0x7f ) { - jump = newjump; - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - } - } - *j8 = (u8)jump; -} - -emitterT void x86SetJ32( u32* j32 ) -{ - *j32 = ( x86Ptr - (u8*)j32 ) - 4; -} - -emitterT void x86SetJ32A( u32* j32 ) -{ - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - x86SetJ32(j32); -} - -emitterT void x86Align( int bytes ) -{ - // forward align - x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); -} -*/ +} \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 722aba5089..f89443112c 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -57,27 +57,29 @@ static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86In return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); } +////////////////////////////////////////////////////////////////////////////////////////// +// #define DEFINE_LEGACY_HELPER( cod, bits ) \ - emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { cod( _reghlp(to), _reghlp(from) ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { cod( _reghlp(to), (void*)from ); } \ - emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { cod( (void*)to, _reghlp(from) ); } \ - emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp(to), _mhlp(from) + offset ); } \ - emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp(to) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { i##cod( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { i##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { i##cod( _reghlp(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { i##cod( (void*)to, _reghlp(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { i##cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp(to), _mhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp(to) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ - { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + { i##cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \ - { cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } + { i##cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } #define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \ - emitterT void cod##bits##CLtoR( x86IntRegType to ) { cod( _reghlp(to), cl ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##CLtoM( uptr to ) { cod( ptr##bits[to], cl ); } \ - emitterT void cod##bits##ItoM( uptr to, u8 imm ) { cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp(to) + offset, cl ); } + emitterT void cod##bits##CLtoR( x86IntRegType to ) { i##cod( _reghlp(to), cl ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { i##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##CLtoM( uptr to ) { i##cod( ptr##bits[to], cl ); } \ + emitterT void cod##bits##ItoM( uptr to, u8 imm ) { i##cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { i##cod( _mhlp(to) + offset, cl ); } //emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ // { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ @@ -113,32 +115,44 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR ) DEFINE_OPCODE_LEGACY( MOV ) +// ------------------------------------------------------------------------ +#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \ + emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \ + emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ + emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); } + +DEFINE_LEGACY_MOVEXTEND( S, 16 ) +DEFINE_LEGACY_MOVEXTEND( Z, 16 ) +DEFINE_LEGACY_MOVEXTEND( S, 8 ) +DEFINE_LEGACY_MOVEXTEND( Z, 8 ) + + // mov r32 to [r32<(to), from ); + iAND( _reghlp<4>(to), from ); } emitterT void AND32I8toM( uptr to, s8 from ) { - AND( ptr8[to], from ); + iAND( ptr8[to], from ); } @@ -310,103 +324,6 @@ emitterT void NOP( void ) write8(0x90); } - -/* movsx r8 to r32 */ -emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 3, to, from ); -} - -emitterT void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movsx m8 to r32 */ -emitterT void MOVSX32M8toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xBE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movsx r16 to r32 */ -emitterT void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 3, to, from ); -} - -emitterT void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movsx m16 to r32 */ -emitterT void MOVSX32M16toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xBF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movzx r8 to r32 */ -emitterT void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 3, to, from ); -} - -emitterT void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movzx m8 to r32 */ -emitterT void MOVZX32M8toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xB60F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movzx r16 to r32 */ -emitterT void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 3, to, from ); -} - -emitterT void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movzx m16 to r32 */ -emitterT void MOVZX32M16toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xB70F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - /* cmovbe r32 to r32 */ emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) { diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index b1497dd139..bba16e87ed 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -18,9 +18,8 @@ #pragma once -extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs +extern void cpudetectInit();//this is all that needs to be called and will fill up the below structs -typedef struct CAPABILITIES CAPABILITIES; //cpu capabilities structure struct CAPABILITIES { u32 hasFloatingPointUnit; @@ -137,9 +136,9 @@ namespace x86Emitter // single-line functions anyway. // #ifdef PCSX2_DEVBUILD -#define __emitinline +# define __emitinline #else -#define __emitinline __forceinline +# define __emitinline __forceinline #endif #ifdef _MSC_VER @@ -148,13 +147,18 @@ namespace x86Emitter # define __noinline #endif - static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) class x86AddressInfo; class ModSibBase; + extern void iSetPtr( void* ptr ); + extern u8* iGetPtr(); + extern void iAlignPtr( uint bytes ); + extern void iAdvancePtr( uint bytes ); + + static __forceinline void write8( u8 val ) { iWrite( val ); @@ -195,7 +199,7 @@ namespace x86Emitter x86Register(): Id( -1 ) {} explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } - bool IsEmpty() const { return Id == -1; } + bool IsEmpty() const { return Id < 0; } // Returns true if the register is a valid accumulator: Eax, Ax, Al. bool IsAccumulator() const { return Id == 0; } @@ -220,7 +224,7 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which // means it finds undeclared variables when MSVC does not (Since MSVC compiles templates - // when they are actually used). In practice this sucks since it means we have to move all' + // when they are actually used). In practice this sucks since it means we have to move all // our variable and function prototypes from a nicely/neatly unified location to being strewn // all about the the templated code in haphazard fashion. Yay.. >_< // @@ -476,6 +480,118 @@ namespace x86Emitter extern const x86IndexerTypeExplicit<2> ptr16; extern const x86IndexerTypeExplicit<1> ptr8; + ////////////////////////////////////////////////////////////////////////////////////////// + // JccComparisonType - enumerated possibilities for inspired code branching! + // + enum JccComparisonType + { + Jcc_Unknown = -2, + Jcc_Unconditional = -1, + Jcc_Overflow = 0x0, + Jcc_NotOverflow = 0x1, + Jcc_Below = 0x2, + Jcc_Carry = 0x2, + Jcc_AboveOrEqual = 0x3, + Jcc_NotCarry = 0x3, + Jcc_Zero = 0x4, + Jcc_Equal = 0x4, + Jcc_NotZero = 0x5, + Jcc_NotEqual = 0x5, + Jcc_BelowOrEqual = 0x6, + Jcc_Above = 0x7, + Jcc_Signed = 0x8, + Jcc_Unsigned = 0x9, + Jcc_ParityEven = 0xa, + Jcc_ParityOdd = 0xb, + Jcc_Less = 0xc, + Jcc_GreaterOrEqual = 0xd, + Jcc_LessOrEqual = 0xe, + Jcc_Greater = 0xf, + }; + + // Not supported yet: + //E3 cb JECXZ rel8 Jump short if ECX register is 0. + + + ////////////////////////////////////////////////////////////////////////////////////////// + // iSmartJump + // This class provides an interface for generating forward-based j8's or j32's "smartly" + // as per the measured displacement distance. If the displacement is a valid s8, then + // a j8 is inserted, else a j32. + // + // Performance Analysis: j8's use 4 less byes per opcode, and thus can provide + // minor speed benefits in the form of L1/L2 cache clutter. They're also notably faster + // on P4's, and mildly faster on AMDs. (Core2's and i7's don't care) + // + class iSmartJump + { + protected: + u8* m_target; // x86Ptr target address of this label + u8* m_baseptr; // base address of the instruction (passed to the instruction emitter) + JccComparisonType m_cc; // comparison type of the instruction + bool m_written; // set true when the jump is written (at which point the object becomes invalid) + + public: + + const int GetMaxInstructionSize() const + { + jASSUME( m_cc != Jcc_Unknown ); + return ( m_cc == Jcc_Unconditional ) ? 5 : 6; + } + + // Creates a backward jump label which will be passed into a Jxx instruction (or few!) + // later on, and the current x86Ptr is recorded as the target [thus making the class + // creation point the jump target]. + iSmartJump() + { + m_target = iGetPtr(); + m_baseptr = NULL; + m_cc = Jcc_Unknown; + m_written = false; + } + + // ccType - Comparison type to be written back to the jump instruction position. + // + iSmartJump( JccComparisonType ccType ) + { + jASSUME( ccType != Jcc_Unknown ); + m_target = NULL; + m_baseptr = iGetPtr(); + m_cc = ccType; + m_written = false; + iAdvancePtr( GetMaxInstructionSize() ); + } + + JccComparisonType GetCondition() const + { + return m_cc; + } + + u8* GetTarget() const + { + return m_target; + } + + void SetTarget(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + template< typename OperandType > + class iForwardJump + { + public: + static const uint OperandSize = sizeof( OperandType ); + + // pointer to base of the instruction *Following* the jump. The jump address will be + // relative to this address. + s8* const BasePtr; + + public: + iForwardJump( JccComparisonType cctype = Jcc_Unconditional ); + void SetTarget() const; + }; + ////////////////////////////////////////////////////////////////////////////////////////// // namespace Internal @@ -678,13 +794,6 @@ namespace x86Emitter } }; - // if the immediate is zero, we can replace the instruction, or ignore it - // entirely, depending on the instruction being issued. That's what we do here. - // (returns FALSE if no optimization is performed) - // [TODO] : Work-in-progress! - //template< G1Type InstType, typename RegType > - //static __forceinline void _optimize_imm0( RegType to ); - // ------------------------------------------------------------------- // template< G1Type InstType > @@ -789,7 +898,6 @@ namespace x86Emitter __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } - Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. }; @@ -799,22 +907,85 @@ namespace x86Emitter // importing Internal into x86Emitter, which done at the header file level would defeat // the purpose!) - extern const Group1ImplAll ADD; - extern const Group1ImplAll OR; - extern const Group1ImplAll ADC; - extern const Group1ImplAll SBB; - extern const Group1ImplAll AND; - extern const Group1ImplAll SUB; - extern const Group1ImplAll XOR; - extern const Group1ImplAll CMP; + extern const Group1ImplAll iADD; + extern const Group1ImplAll iOR; + extern const Group1ImplAll iADC; + extern const Group1ImplAll iSBB; + extern const Group1ImplAll iAND; + extern const Group1ImplAll iSUB; + extern const Group1ImplAll iXOR; + extern const Group1ImplAll iCMP; - extern const Group2ImplAll ROL; - extern const Group2ImplAll ROR; - extern const Group2ImplAll RCL; - extern const Group2ImplAll RCR; - extern const Group2ImplAll SHL; - extern const Group2ImplAll SHR; - extern const Group2ImplAll SAR; + extern const Group2ImplAll iROL; + extern const Group2ImplAll iROR; + extern const Group2ImplAll iRCL; + extern const Group2ImplAll iRCR; + extern const Group2ImplAll iSHL; + extern const Group2ImplAll iSHR; + extern const Group2ImplAll iSAR; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Mov with sign/zero extension implementations: + // + template< int DestOperandSize, int SrcOperandSize > + class MovExtendImpl + { + protected: + static bool Is8BitOperand() { return SrcOperandSize == 1; } + static void prefix16() { if( DestOperandSize == 2 ) iWrite( 0x66 ); } + static __forceinline void emit_base( bool SignExtend ) + { + prefix16(); + iWrite( 0x0f ); + iWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); + } + + public: + MovExtendImpl() {} // For the love of GCC. + + static __emitinline void Emit( const x86Register& to, const x86Register& from, bool SignExtend ) + { + emit_base( SignExtend ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const x86Register& to, const ModSibStrict& sibsrc, bool SignExtend ) + { + emit_base( SignExtend ); + EmitSibMagic( to.Id, sibsrc ); + } + }; + + // ------------------------------------------------------------------------ + template< bool SignExtend > + class MovExtendImplAll + { + protected: + typedef MovExtendImpl<4, 2> m_16to32; + typedef MovExtendImpl<4, 1> m_8to32; + + public: + __forceinline void operator()( const x86Register32& to, const x86Register16& from ) const { m_16to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } + + __forceinline void operator()( const x86Register32& to, const x86Register8& from ) const { m_8to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } + + MovExtendImplAll() {} // don't ask. + }; + + // ------------------------------------------------------------------------ + + extern const MovExtendImplAll iMOVSX; + extern const MovExtendImplAll iMOVZX; + + + // if the immediate is zero, we can replace the instruction, or ignore it + // entirely, depending on the instruction being issued. That's what we do here. + // (returns FALSE if no optimization is performed) + // [TODO] : Work-in-progress! + //template< G1Type InstType, typename RegType > + //static __forceinline void _optimize_imm0( RegType to ); /*template< G1Type InstType, typename RegType > static __forceinline void _optimize_imm0( const RegType& to ) @@ -822,26 +993,26 @@ namespace x86Emitter switch( InstType ) { // ADD, SUB, and OR can be ignored if the imm is zero.. - case G1Type_ADD: - case G1Type_SUB: - case G1Type_OR: - return true; + case G1Type_ADD: + case G1Type_SUB: + case G1Type_OR: + return true; // ADC and SBB can never be ignored (could have carry bits) // XOR behavior is distinct as well [or is it the same as NEG or NOT?] - case G1Type_ADC: - case G1Type_SBB: - case G1Type_XOR: - return false; + case G1Type_ADC: + case G1Type_SBB: + case G1Type_XOR: + return false; // replace AND with XOR (or SUB works too.. whatever!) - case G1Type_AND: - XOR( to, to ); + case G1Type_AND: + iXOR( to, to ); return true; // replace CMP with OR reg,reg: - case G1Type_CMP: - OR( to, to ); + case G1Type_CMP: + iOR( to, to ); return true; jNO_DEFAULT