diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h index b181e8ac48..8843863ba8 100644 --- a/common/include/Pcsx2Defs.h +++ b/common/include/Pcsx2Defs.h @@ -41,24 +41,9 @@ # define ArraySize(x) (sizeof(x)/sizeof((x)[0])) #endif -////////////////////////////////////////////////////////////////////////////////////////// -// __releaseinline -- a forceinline macro that is enabled for RELEASE/PUBLIC builds ONLY. -// This is useful because forceinline can make certain types of debugging problematic since -// functions that look like they should be called won't breakpoint since their code is -// inlined, and it can make stack traces confusing or near useless. -// -// Use __releaseinline for things which are generally large functions where trace debugging -// from Devel builds is likely useful; but which should be inlined in an optimized Release -// environment. -// -#ifdef PCSX2_DEVBUILD -# define __releaseinline -#else -# define __releaseinline __forceinline -#endif - -////////////////////////////////////////////////////////////////////////////////////////// -// jASSUME - give hints to the optimizer +// -------------------------------------------------------------------------------------- +// jASSUME - give hints to the optimizer [obsolete, use pxAssume() instead] +// -------------------------------------------------------------------------------------- // This is primarily useful for the default case switch optimizer, which enables VC to // generate more compact switches. // @@ -83,7 +68,9 @@ # endif #endif -////////////////////////////////////////////////////////////////////////////////////////// +// -------------------------------------------------------------------------------------- +// C_ASSERT +// -------------------------------------------------------------------------------------- // compile-time assertion; usable at static variable define level. // (typically used to confirm the correct sizeof() for struct types where size // restaints must be enforced). @@ -92,9 +79,9 @@ # define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1] #endif -////////////////////////////////////////////////////////////////////////////////////////// +// -------------------------------------------------------------------------------------- // Dev / Debug conditionals - Consts for using if() statements instead of uglier #ifdef. -// +// -------------------------------------------------------------------------------------- // Note: Using if() optimizes nicely in Devel and Release builds, but will generate extra // code overhead in debug builds (since debug neither inlines, nor optimizes out const- // level conditionals). Normally not a concern, but if you stick if( IsDevbuild ) in @@ -146,9 +133,9 @@ # define pxReleaseCode(code) code #endif -////////////////////////////////////////////////////////////////////////////////////////// +// -------------------------------------------------------------------------------------- // __aligned / __aligned16 / __pagealigned -// +// -------------------------------------------------------------------------------------- // GCC Warning! The GCC linker (LD) typically fails to assure alignment of class members. // If you want alignment to be assured, the variable must either be a member of a struct // or a static global. @@ -167,9 +154,9 @@ #define PCSX2_PAGESIZE 0x1000 static const int __pagesize = PCSX2_PAGESIZE; -////////////////////////////////////////////////////////////////////////////////////////// +// -------------------------------------------------------------------------------------- // Structure Packing (__packed) -// +// -------------------------------------------------------------------------------------- // Current Method: // Use a combination of embedded compiler-specific #pragma mess in conjunction with a // __packed macro. The former appeases the MSVC gods, the latter appeases the GCC gods. @@ -296,8 +283,28 @@ static const int __pagesize = PCSX2_PAGESIZE; #endif // end GCC-specific section. #ifndef THE_UNBEARABLE_LIGHTNESS_OF_BEING_GCC_4_4_0 -# define __nooptimization +# define __nooptimization // Pretty sure this is obsolete now, since we fixed __asm contraints and stuff. --air #endif +// -------------------------------------------------------------------------------------- +// __releaseinline / __ri -- a forceinline macro that is enabled for RELEASE/PUBLIC builds ONLY. +// -------------------------------------------------------------------------------------- +// This is useful because forceinline can make certain types of debugging problematic since +// functions that look like they should be called won't breakpoint since their code is +// inlined, and it can make stack traces confusing or near useless. +// +// Use __releaseinline for things which are generally large functions where trace debugging +// from Devel builds is likely useful; but which should be inlined in an optimized Release +// environment. +// +#ifdef PCSX2_DEVBUILD +# define __releaseinline +#else +# define __releaseinline __forceinline +#endif + +#define __ri __releaseinline +#define __fi __forceinline + #endif diff --git a/common/include/Utilities/Dependencies.h b/common/include/Utilities/Dependencies.h index 4106611308..82345dc354 100644 --- a/common/include/Utilities/Dependencies.h +++ b/common/include/Utilities/Dependencies.h @@ -80,18 +80,18 @@ namespace Threading // EnumToString(value); // #define ImplementEnumOperators( enumName ) \ - static __forceinline enumName& operator++ ( enumName& src ) { src = (enumName)((int)src+1); return src; } \ - static __forceinline enumName& operator-- ( enumName& src ) { src = (enumName)((int)src-1); return src; } \ - static __forceinline enumName operator++ ( enumName& src, int ) { enumName orig = src; src = (enumName)((int)src+1); return orig; } \ - static __forceinline enumName operator-- ( enumName& src, int ) { enumName orig = src; src = (enumName)((int)src-1); return orig; } \ + static __fi enumName& operator++ ( enumName& src ) { src = (enumName)((int)src+1); return src; } \ + static __fi enumName& operator-- ( enumName& src ) { src = (enumName)((int)src-1); return src; } \ + static __fi enumName operator++ ( enumName& src, int ) { enumName orig = src; src = (enumName)((int)src+1); return orig; } \ + static __fi enumName operator-- ( enumName& src, int ) { enumName orig = src; src = (enumName)((int)src-1); return orig; } \ \ - static __forceinline bool operator< ( const enumName& left, const pxEnumEnd_t& ) { return (int)left < enumName##_COUNT; } \ - static __forceinline bool operator!=( const enumName& left, const pxEnumEnd_t& ) { return (int)left != enumName##_COUNT; } \ - static __forceinline bool operator==( const enumName& left, const pxEnumEnd_t& ) { return (int)left == enumName##_COUNT; } \ + static __fi bool operator< ( const enumName& left, const pxEnumEnd_t& ) { return (int)left < enumName##_COUNT; } \ + static __fi bool operator!=( const enumName& left, const pxEnumEnd_t& ) { return (int)left != enumName##_COUNT; } \ + static __fi bool operator==( const enumName& left, const pxEnumEnd_t& ) { return (int)left == enumName##_COUNT; } \ \ - static __forceinline bool EnumIsValid( enumName id ) { \ + static __fi bool EnumIsValid( enumName id ) { \ return ((int)id >= enumName##_FIRST) && ((int)id < enumName##_COUNT); } \ - static __forceinline bool EnumAssert( enumName id ) { \ + static __fi bool EnumAssert( enumName id ) { \ return pxAssert( EnumIsValid(id) ); } \ \ extern const wxChar* EnumToString( enumName id ) diff --git a/common/include/Utilities/EventSource.inl b/common/include/Utilities/EventSource.inl index 7f3d00be68..7cee20b57a 100644 --- a/common/include/Utilities/EventSource.inl +++ b/common/include/Utilities/EventSource.inl @@ -63,7 +63,7 @@ typename EventSource::ListenerIterator EventSource:: template< typename ListenerType > -__forceinline void EventSource::_DispatchRaw( ListenerIterator iter, const ListenerIterator& iend, const EvtParams& evtparams ) +__fi void EventSource::_DispatchRaw( ListenerIterator iter, const ListenerIterator& iend, const EvtParams& evtparams ) { while( iter != iend ) { diff --git a/common/include/Utilities/lnx_memzero.h b/common/include/Utilities/lnx_memzero.h index 11c3e78093..e787acea26 100644 --- a/common/include/Utilities/lnx_memzero.h +++ b/common/include/Utilities/lnx_memzero.h @@ -20,7 +20,7 @@ // memset16, etc. template< u32 data, typename T > -static __forceinline void memset32( T& obj ) +static __fi void memset32( T& obj ) { // this function works on 32-bit aligned lengths of data only. // If the data length is not a factor of 32 bits, the C++ optimizing compiler will @@ -34,19 +34,19 @@ static __forceinline void memset32( T& obj ) } template< uint size > -static __forceinline void memzero_ptr( void* dest ) +static __fi void memzero_ptr( void* dest ) { memset( dest, 0, size ); } template< typename T > -static __forceinline void memzero( T& obj ) +static __fi void memzero( T& obj ) { memset( &obj, 0, sizeof( T ) ); } template< u8 data, typename T > -static __forceinline void memset8( T& obj ) +static __fi void memset8( T& obj ) { // Aligned sizes use the optimized 32 bit inline memset. Unaligned sizes use memset. if( (sizeof(T) & 0x3) != 0 ) @@ -56,7 +56,7 @@ static __forceinline void memset8( T& obj ) } template< u16 data, typename T > -static __forceinline void memset16( T& obj ) +static __fi void memset16( T& obj ) { if( (sizeof(T) & 0x3) != 0 ) _memset16_unaligned( &obj, data, sizeof( T ) ); @@ -67,7 +67,7 @@ static __forceinline void memset16( T& obj ) // An optimized memset for 8 bit destination data. template< u8 data, size_t bytes > -static __forceinline void memset_8( void *dest ) +static __fi void memset_8( void *dest ) { if( bytes == 0 ) return; diff --git a/common/include/Utilities/win_memzero.h b/common/include/Utilities/win_memzero.h index 639fba6b71..a941032a3c 100644 --- a/common/include/Utilities/win_memzero.h +++ b/common/include/Utilities/win_memzero.h @@ -60,7 +60,7 @@ // This is an implementation of the memzero_ptr fast memset routine (for zero-clears only). template< size_t _bytes > -static __forceinline void memzero_ptr( void *dest ) +static __fi void memzero_ptr( void *dest ) { if( MZFbytes == 0 ) return; @@ -247,7 +247,7 @@ static __forceinline void memzero_ptr( void *dest ) // An optimized memset for 8 bit destination data. template< u8 data, size_t _bytes > -static __forceinline void memset_8( void *dest ) +static __fi void memset_8( void *dest ) { if( MZFbytes == 0 ) return; @@ -374,7 +374,7 @@ static __forceinline void memset_8( void *dest ) } template< u16 data, size_t _bytes > -static __forceinline void memset_16( void *dest ) +static __fi void memset_16( void *dest ) { if( MZFbytes == 0 ) return; @@ -462,7 +462,7 @@ static __forceinline void memset_16( void *dest ) } template< u32 data, size_t MZFbytes > -static __forceinline void memset_32( void *dest ) +static __fi void memset_32( void *dest ) { if( MZFbytes == 0 ) return; @@ -547,28 +547,28 @@ static __forceinline void memset_32( void *dest ) // Structures, static arrays, etc. No need to include sizeof() crap, this does it automatically // for you! template< typename T > -static __forceinline void memzero( T& object ) +static __fi void memzero( T& object ) { memzero_ptr( &object ); } // This method clears an object with the given 8 bit value. template< u8 data, typename T > -static __forceinline void memset8( T& object ) +static __fi void memset8( T& object ) { memset_8( &object ); } // This method clears an object with the given 16 bit value. template< u16 data, typename T > -static __forceinline void memset16( T& object ) +static __fi void memset16( T& object ) { memset_16( &object ); } // This method clears an object with the given 32 bit value. template< u32 data, typename T > -static __forceinline void memset32( T& object ) +static __fi void memset32( T& object ) { memset_32( &object ); } diff --git a/common/include/Utilities/wxGuiTools.h b/common/include/Utilities/wxGuiTools.h index 5d91c60e64..51940ce6b1 100644 --- a/common/include/Utilities/wxGuiTools.h +++ b/common/include/Utilities/wxGuiTools.h @@ -138,12 +138,12 @@ struct pxStretchType } }; -static __forceinline wxSizerFlags pxProportion( int prop ) +static __fi wxSizerFlags pxProportion( int prop ) { return wxSizerFlags( prop ); } -static __forceinline wxSizerFlags pxBorder( int dir=wxALL, int pad=pxSizerFlags::StdPadding ) +static __fi wxSizerFlags pxBorder( int dir=wxALL, int pad=pxSizerFlags::StdPadding ) { return wxSizerFlags().Border( dir, pad ); } diff --git a/common/include/x86emitter/implement/jmpcall.h b/common/include/x86emitter/implement/jmpcall.h index 6b7fa49ec0..9fa094edbc 100644 --- a/common/include/x86emitter/implement/jmpcall.h +++ b/common/include/x86emitter/implement/jmpcall.h @@ -22,7 +22,7 @@ namespace x86Emitter { #ifdef __GNUG__ // GCC has a bug that causes the templated function handler for Jmp/Call emitters to generate // bad asm code. (error is something like "7#*_uber_379s_mangled_$&02_name is already defined!") - // Using GCC's always_inline attribute fixes it. This differs from __forceinline in that it + // Using GCC's always_inline attribute fixes it. This differs from __fi in that it // inlines *even in debug builds* which is (usually) undesirable. // ... except when it avoids compiler bugs. # define __always_inline_tmpl_fail __attribute__((always_inline)) @@ -45,7 +45,7 @@ struct xImpl_JmpCall // Special form for calling functions. This form automatically resolves the // correct displacement based on the size of the instruction being generated. - template< typename T > __forceinline __always_inline_tmpl_fail + template< typename T > __fi __always_inline_tmpl_fail void operator()( T* func ) const { if( isJmp ) diff --git a/common/include/x86emitter/inlines.inl b/common/include/x86emitter/inlines.inl index b36bd0fa32..b242bd73d7 100644 --- a/common/include/x86emitter/inlines.inl +++ b/common/include/x86emitter/inlines.inl @@ -37,7 +37,7 @@ // definitions in the .h file because of inter-dependencies with other classes. // (score one for C++!!) // -// In order for MSVC to work correctly with __forceinline on class members, +// In order for MSVC to work correctly with __fi on class members, // however, we need to include these methods into all source files which might // reference them. Without this MSVC generates linker errors. Or, in other words, // global optimization fails to resolve the externals and junk. @@ -50,51 +50,51 @@ namespace x86Emitter // x86Register Method Implementations (inlined!) // -------------------------------------------------------------------------------------- - __forceinline xAddressInfo xAddressReg::operator+( const xAddressReg& right ) const + __fi xAddressInfo xAddressReg::operator+( const xAddressReg& right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( *this, right ); } - __forceinline xAddressInfo xAddressReg::operator+( const xAddressInfo& right ) const + __fi xAddressInfo xAddressReg::operator+( const xAddressInfo& right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return right + *this; } - __forceinline xAddressInfo xAddressReg::operator+( s32 right ) const + __fi xAddressInfo xAddressReg::operator+( s32 right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( *this, right ); } - __forceinline xAddressInfo xAddressReg::operator+( const void* right ) const + __fi xAddressInfo xAddressReg::operator+( const void* right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( *this, (s32)right ); } // ------------------------------------------------------------------------ - __forceinline xAddressInfo xAddressReg::operator-( s32 right ) const + __fi xAddressInfo xAddressReg::operator-( s32 right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( *this, -right ); } - __forceinline xAddressInfo xAddressReg::operator-( const void* right ) const + __fi xAddressInfo xAddressReg::operator-( const void* right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( *this, -(s32)right ); } // ------------------------------------------------------------------------ - __forceinline xAddressInfo xAddressReg::operator*( u32 right ) const + __fi xAddressInfo xAddressReg::operator*( u32 right ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( xEmptyReg, *this, right ); } - __forceinline xAddressInfo xAddressReg::operator<<( u32 shift ) const + __fi xAddressInfo xAddressReg::operator<<( u32 shift ) const { pxAssertMsg( Id != -1, "Uninitialized x86 register." ); return xAddressInfo( xEmptyReg, *this, 1< __forceinline void xJE( T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } - template< typename T > __forceinline void xJZ( T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNE( T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNZ( T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } + template< typename T > __fi void xJE( T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } + template< typename T > __fi void xJZ( T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } + template< typename T > __fi void xJNE( T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } + template< typename T > __fi void xJNZ( T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJO( T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNO( T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJC( T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNC( T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJS( T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNS( T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } + template< typename T > __fi void xJO( T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } + template< typename T > __fi void xJNO( T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } + template< typename T > __fi void xJC( T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } + template< typename T > __fi void xJNC( T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } + template< typename T > __fi void xJS( T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } + template< typename T > __fi void xJNS( T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPE( T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPO( T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } + template< typename T > __fi void xJPE( T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } + template< typename T > __fi void xJPO( T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } - template< typename T > __forceinline void xJL( T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } - template< typename T > __forceinline void xJLE( T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJG( T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } - template< typename T > __forceinline void xJGE( T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } + template< typename T > __fi void xJL( T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } + template< typename T > __fi void xJLE( T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } + template< typename T > __fi void xJG( T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } + template< typename T > __fi void xJGE( T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJB( T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } - template< typename T > __forceinline void xJBE( T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJA( T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } - template< typename T > __forceinline void xJAE( T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } + template< typename T > __fi void xJB( T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } + template< typename T > __fi void xJBE( T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } + template< typename T > __fi void xJA( T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } + template< typename T > __fi void xJAE( T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } // ------------------------------------------------------------------------ // Forward Jump Helpers (act as labels!) diff --git a/common/include/x86emitter/legacy_internal.h b/common/include/x86emitter/legacy_internal.h index 5ebcc27279..5de32eec52 100644 --- a/common/include/x86emitter/legacy_internal.h +++ b/common/include/x86emitter/legacy_internal.h @@ -21,7 +21,7 @@ // Legacy Helper Macros and Functions (depreciated) //------------------------------------------------------------------ -#define emitterT __forceinline +#define emitterT __fi using x86Emitter::xWrite8; using x86Emitter::xWrite16; diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index 04b791a6f6..b29170371b 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -80,7 +80,7 @@ extern const char* xGetRegName( int regid, int operandSize ); //------------------------------------------------------------------ // templated version of is_s8 is required, so that u16's get correct sign extension treatment. template< typename T > -static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } +static __fi bool is_s8( T imm ) { return (s8)imm == (s32)imm; } template< typename T > void xWrite( T val ); @@ -114,14 +114,14 @@ template< typename T > void xWrite( T val ); // // In the case of (Reg, Imm) forms, the inlining is up to the discreation of the compiler. // -// Note: I *intentionally* use __forceinline directly for most single-line class members, +// Note: I *intentionally* use __fi directly for most single-line class members, // when needed. There's no point in using __emitline in these cases since the debugger // can't trace into single-line functions anyway. // #ifdef PCSX2_DEVBUILD # define __emitinline #else -# define __emitinline __forceinline +# define __emitinline __fi #endif // ModRM 'mod' field enumeration. Provided mostly for reference: @@ -535,15 +535,15 @@ template< typename T > void xWrite( T val ); xAddressVoid& Add( const xAddressReg& src ); xAddressVoid& Add( const xAddressVoid& src ); - __forceinline xAddressVoid operator+( const xAddressReg& right ) const { return xAddressVoid( *this ).Add( right ); } - __forceinline xAddressVoid operator+( const xAddressVoid& right ) const { return xAddressVoid( *this ).Add( right ); } - __forceinline xAddressVoid operator+( s32 imm ) const { return xAddressVoid( *this ).Add( imm ); } - __forceinline xAddressVoid operator-( s32 imm ) const { return xAddressVoid( *this ).Add( -imm ); } - __forceinline xAddressVoid operator+( const void* addr ) const { return xAddressVoid( *this ).Add( (uptr)addr ); } + __fi xAddressVoid operator+( const xAddressReg& right ) const { return xAddressVoid( *this ).Add( right ); } + __fi xAddressVoid operator+( const xAddressVoid& right ) const { return xAddressVoid( *this ).Add( right ); } + __fi xAddressVoid operator+( s32 imm ) const { return xAddressVoid( *this ).Add( imm ); } + __fi xAddressVoid operator-( s32 imm ) const { return xAddressVoid( *this ).Add( -imm ); } + __fi xAddressVoid operator+( const void* addr ) const { return xAddressVoid( *this ).Add( (uptr)addr ); } - __forceinline void operator+=( const xAddressReg& right ) { Add( right ); } - __forceinline void operator+=( s32 imm ) { Add( imm ); } - __forceinline void operator-=( s32 imm ) { Add( -imm ); } + __fi void operator+=( const xAddressReg& right ) { Add( right ); } + __fi void operator+=( s32 imm ) { Add( imm ); } + __fi void operator-=( s32 imm ) { Add( -imm ); } }; // -------------------------------------------------------------------------------------- @@ -584,13 +584,13 @@ template< typename T > void xWrite( T val ); xAddressInfo& Add( const xAddressReg& src ) { _parent::Add(src); return *this; } xAddressInfo& Add( const xAddressInfo& src ) { _parent::Add(src); return *this; } - __forceinline xAddressInfo operator+( const xAddressReg& right ) const { return xAddressInfo( *this ).Add( right ); } - __forceinline xAddressInfo operator+( const xAddressInfo& right ) const { return xAddressInfo( *this ).Add( right ); } - __forceinline xAddressInfo operator+( s32 imm ) const { return xAddressInfo( *this ).Add( imm ); } - __forceinline xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); } - __forceinline xAddressInfo operator+( const void* addr ) const { return xAddressInfo( *this ).Add( (uptr)addr ); } + __fi xAddressInfo operator+( const xAddressReg& right ) const { return xAddressInfo( *this ).Add( right ); } + __fi xAddressInfo operator+( const xAddressInfo& right ) const { return xAddressInfo( *this ).Add( right ); } + __fi xAddressInfo operator+( s32 imm ) const { return xAddressInfo( *this ).Add( imm ); } + __fi xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); } + __fi xAddressInfo operator+( const void* addr ) const { return xAddressInfo( *this ).Add( (uptr)addr ); } - __forceinline void operator+=( const xAddressInfo& right ) { Add( right ); } + __fi void operator+=( const xAddressInfo& right ) { Add( right ); } }; typedef xAddressInfo xAddress128; @@ -599,25 +599,25 @@ template< typename T > void xWrite( T val ); typedef xAddressInfo xAddress16; typedef xAddressInfo xAddress8; - static __forceinline xAddressVoid operator+( const void* addr, const xAddressVoid& right ) + static __fi xAddressVoid operator+( const void* addr, const xAddressVoid& right ) { return right + addr; } - static __forceinline xAddressVoid operator+( s32 addr, const xAddressVoid& right ) + static __fi xAddressVoid operator+( s32 addr, const xAddressVoid& right ) { return right + addr; } template< typename OperandType > - static __forceinline xAddressInfo operator+( const void* addr, const xAddressInfo& right ) + static __fi xAddressInfo operator+( const void* addr, const xAddressInfo& right ) { //return xAddressInfo( (sptr)addr ).Add( reg ); return right + addr; } template< typename OperandType > - static __forceinline xAddressInfo operator+( s32 addr, const xAddressInfo& right ) + static __fi xAddressInfo operator+( s32 addr, const xAddressInfo& right ) { return right + addr; } @@ -691,8 +691,8 @@ template< typename T > void xWrite( T val ); return xAddressVoid( Base, Index, Scale, Displacement ); } - __forceinline xIndirectVoid operator+( const s32 imm ) const { return xIndirectVoid( *this ).Add( imm ); } - __forceinline xIndirectVoid operator-( const s32 imm ) const { return xIndirectVoid( *this ).Add( -imm ); } + __fi xIndirectVoid operator+( const s32 imm ) const { return xIndirectVoid( *this ).Add( imm ); } + __fi xIndirectVoid operator-( const s32 imm ) const { return xIndirectVoid( *this ).Add( -imm ); } protected: void Reduce(); @@ -717,8 +717,8 @@ template< typename T > void xWrite( T val ); return *this; } - __forceinline xIndirect operator+( const s32 imm ) const { return xIndirect( *this ).Add( imm ); } - __forceinline xIndirect operator-( const s32 imm ) const { return xIndirect( *this ).Add( -imm ); } + __fi xIndirect operator+( const s32 imm ) const { return xIndirect( *this ).Add( imm ); } + __fi xIndirect operator-( const s32 imm ) const { return xIndirect( *this ).Add( -imm ); } bool operator==( const xIndirect& src ) const { @@ -963,12 +963,12 @@ template< typename T > void xWrite( T val ); } }; - static __forceinline xAddressVoid operator+( const void* addr, const xAddressReg& reg ) + static __fi xAddressVoid operator+( const void* addr, const xAddressReg& reg ) { return reg + (sptr)addr; } - static __forceinline xAddressVoid operator+( s32 addr, const xAddressReg& reg ) + static __fi xAddressVoid operator+( s32 addr, const xAddressReg& reg ) { return reg + (sptr)addr; } diff --git a/common/src/Utilities/AlignedMalloc.cpp b/common/src/Utilities/AlignedMalloc.cpp index fc2055a78f..4f8264018a 100644 --- a/common/src/Utilities/AlignedMalloc.cpp +++ b/common/src/Utilities/AlignedMalloc.cpp @@ -60,7 +60,7 @@ void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align) return newbuf; } -__forceinline void pcsx2_aligned_free(void* pmem) +__fi void pcsx2_aligned_free(void* pmem) { if( pmem == NULL ) return; AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)pmem - headsize); @@ -73,7 +73,7 @@ __forceinline void pcsx2_aligned_free(void* pmem) // Special unaligned memset used when all other optimized memsets fail (it's called from // memzero_obj and stuff). -__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size ) +__fi void _memset16_unaligned( void* dest, u16 data, size_t size ) { pxAssume( (size & 0x1) == 0 ); @@ -82,7 +82,7 @@ __forceinline void _memset16_unaligned( void* dest, u16 data, size_t size ) *dst = data; } -__forceinline void HostSys::Munmap( void* base, u32 size ) +__fi void HostSys::Munmap( void* base, u32 size ) { Munmap( (uptr)base, size ); } diff --git a/common/src/Utilities/Console.cpp b/common/src/Utilities/Console.cpp index c95a0c4e71..e42bf8ef29 100644 --- a/common/src/Utilities/Console.cpp +++ b/common/src/Utilities/Console.cpp @@ -108,7 +108,7 @@ const IConsoleWriter ConsoleWriter_Null = // -------------------------------------------------------------------------------------- #ifdef __LINUX__ -static __forceinline const wxChar* GetLinuxConsoleColor(ConsoleColors color) +static __fi const wxChar* GetLinuxConsoleColor(ConsoleColors color) { switch(color) { diff --git a/common/src/Utilities/Exceptions.cpp b/common/src/Utilities/Exceptions.cpp index 4e5fa91e99..5c33bb3c7a 100644 --- a/common/src/Utilities/Exceptions.cpp +++ b/common/src/Utilities/Exceptions.cpp @@ -36,7 +36,7 @@ static wxString GetTranslation( const wxChar* msg ) #ifdef PCSX2_DEVBUILD # define DEVASSERT_INLINE __noinline #else -# define DEVASSERT_INLINE __forceinline +# define DEVASSERT_INLINE __fi #endif // Using a threadlocal assertion guard. Separate threads can assert at the same time. @@ -123,7 +123,7 @@ DEVASSERT_INLINE void pxOnAssert( const DiagnosticOrigin& origin, const wxChar* if( trapit ) { pxTrap(); } } -__forceinline void pxOnAssert( const DiagnosticOrigin& origin, const char* msg) +__fi void pxOnAssert( const DiagnosticOrigin& origin, const char* msg) { pxOnAssert( origin, fromUTF8(msg) ); } diff --git a/common/src/Utilities/FastFormatString.cpp b/common/src/Utilities/FastFormatString.cpp index 9b130d31a0..774737af4a 100644 --- a/common/src/Utilities/FastFormatString.cpp +++ b/common/src/Utilities/FastFormatString.cpp @@ -141,7 +141,7 @@ public: static bool buffer_is_avail = false; static GlobalBufferManager< BaseTlsVariable< FastFormatBuffers< char > > > m_buffer_tls(buffer_is_avail); -static __releaseinline void format_that_ascii_mess( SafeArray& buffer, uint writepos, const char* fmt, va_list argptr ) +static __ri void format_that_ascii_mess( SafeArray& buffer, uint writepos, const char* fmt, va_list argptr ) { while( true ) { @@ -171,7 +171,7 @@ static __releaseinline void format_that_ascii_mess( SafeArray& buffer, uin // though it'd be kinda nice if we did. } -static __releaseinline void format_that_unicode_mess( SafeArray& buffer, uint writepos, const wxChar* fmt, va_list argptr) +static __ri void format_that_unicode_mess( SafeArray& buffer, uint writepos, const wxChar* fmt, va_list argptr) { while( true ) { diff --git a/common/src/Utilities/StringHelpers.cpp b/common/src/Utilities/StringHelpers.cpp index ff0b7aeac6..f17026be04 100644 --- a/common/src/Utilities/StringHelpers.cpp +++ b/common/src/Utilities/StringHelpers.cpp @@ -16,7 +16,7 @@ #include "PrecompiledHeader.h" #include // for wxPoint/wxRect stuff -__forceinline wxString fromUTF8( const char* src ) +__fi wxString fromUTF8( const char* src ) { // IMPORTANT: We cannot use wxString::FromUTF8 because it *stupidly* relies on a C++ global instance of // wxMBConvUTF8(). C++ initializes and destroys these globals at random, so any object constructor or @@ -30,7 +30,7 @@ __forceinline wxString fromUTF8( const char* src ) return wxString( src, wxMBConvUTF8() ); } -__forceinline wxString fromAscii( const char* src ) +__fi wxString fromAscii( const char* src ) { return wxString::FromAscii( src ); } diff --git a/common/src/Utilities/ThreadTools.cpp b/common/src/Utilities/ThreadTools.cpp index ec6d515bc3..4b66bbd536 100644 --- a/common/src/Utilities/ThreadTools.cpp +++ b/common/src/Utilities/ThreadTools.cpp @@ -145,7 +145,7 @@ bool Threading::_WaitGui_RecursionGuard( const wxChar* name ) return true; } -__forceinline void Threading::Timeslice() +__fi void Threading::Timeslice() { sched_yield(); } @@ -774,57 +774,57 @@ void Threading::WaitEvent::Wait() // -------------------------------------------------------------------------------------- // define some overloads for InterlockedExchanges for commonly used types, like u32 and s32. -__forceinline bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit ) +__fi bool Threading::AtomicBitTestAndReset( volatile u32& bitset, u8 bit ) { return _interlockedbittestandreset( (volatile long*)& bitset, bit ) != 0; } -__forceinline u32 Threading::AtomicExchange( volatile u32& Target, u32 value ) +__fi u32 Threading::AtomicExchange( volatile u32& Target, u32 value ) { return _InterlockedExchange( (volatile long*)&Target, value ); } -__forceinline u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value ) +__fi u32 Threading::AtomicExchangeAdd( volatile u32& Target, u32 value ) { return _InterlockedExchangeAdd( (volatile long*)&Target, value ); } -__forceinline u32 Threading::AtomicIncrement( volatile u32& Target ) +__fi u32 Threading::AtomicIncrement( volatile u32& Target ) { return _InterlockedExchangeAdd( (volatile long*)&Target, 1 ); } -__forceinline u32 Threading::AtomicDecrement( volatile u32& Target ) +__fi u32 Threading::AtomicDecrement( volatile u32& Target ) { return _InterlockedExchangeAdd( (volatile long*)&Target, -1 ); } -__forceinline s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) +__fi s32 Threading::AtomicExchange( volatile s32& Target, s32 value ) { return _InterlockedExchange( (volatile long*)&Target, value ); } -__forceinline s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value ) +__fi s32 Threading::AtomicExchangeAdd( volatile s32& Target, s32 value ) { return _InterlockedExchangeAdd( (volatile long*)&Target, value ); } -__forceinline s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value ) +__fi s32 Threading::AtomicExchangeSub( volatile s32& Target, s32 value ) { return _InterlockedExchangeAdd( (volatile long*)&Target, -value ); } -__forceinline s32 Threading::AtomicIncrement( volatile s32& Target ) +__fi s32 Threading::AtomicIncrement( volatile s32& Target ) { return _InterlockedExchangeAdd( (volatile long*)&Target, 1 ); } -__forceinline s32 Threading::AtomicDecrement( volatile s32& Target ) +__fi s32 Threading::AtomicDecrement( volatile s32& Target ) { return _InterlockedExchangeAdd( (volatile long*)&Target, -1 ); } -__forceinline void* Threading::_AtomicExchangePointer( volatile uptr& target, uptr value ) +__fi void* Threading::_AtomicExchangePointer( volatile uptr& target, uptr value ) { #ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place. return (void*)_InterlockedExchange64( &(volatile s64&)target, value ); @@ -833,7 +833,7 @@ __forceinline void* Threading::_AtomicExchangePointer( volatile uptr& target, up #endif } -__forceinline void* Threading::_AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand ) +__fi void* Threading::_AtomicCompareExchangePointer( volatile uptr& target, uptr value, uptr comparand ) { #ifdef _M_AMD64 // high-level atomic ops, please leave these 64 bit checks in place. return (void*)_InterlockedCompareExchange64( &(volatile s64&)target, value ); diff --git a/common/src/Utilities/Windows/WinThreads.cpp b/common/src/Utilities/Windows/WinThreads.cpp index cb9b7e7eda..8c6930a90b 100644 --- a/common/src/Utilities/Windows/WinThreads.cpp +++ b/common/src/Utilities/Windows/WinThreads.cpp @@ -24,24 +24,24 @@ #else -__forceinline void Threading::Sleep( int ms ) +__fi void Threading::Sleep( int ms ) { ::Sleep( ms ); } // For use in spin/wait loops, Acts as a hint to Intel CPUs and should, in theory // improve performance and reduce cpu power consumption. -__forceinline void Threading::SpinWait() +__fi void Threading::SpinWait() { __asm pause; } -__forceinline void Threading::StoreFence() +__fi void Threading::StoreFence() { __asm sfence; } -__forceinline void Threading::EnableHiresScheduler() +__fi void Threading::EnableHiresScheduler() { // This improves accuracy of Sleep() by some amount, and only adds a negligible amount of // overhead on modern CPUs. Typically desktops are already set pretty low, but laptops in @@ -52,7 +52,7 @@ __forceinline void Threading::EnableHiresScheduler() timeBeginPeriod( 1 ); } -__forceinline void Threading::DisableHiresScheduler() +__fi void Threading::DisableHiresScheduler() { timeEndPeriod( 1 ); } diff --git a/common/src/Utilities/x86/MemcpyFast.cpp b/common/src/Utilities/x86/MemcpyFast.cpp index c18b65658c..be026758c8 100644 --- a/common/src/Utilities/x86/MemcpyFast.cpp +++ b/common/src/Utilities/x86/MemcpyFast.cpp @@ -234,7 +234,7 @@ $memcpy_final: } // Quadword Copy! Count is in QWCs (128 bits). Neither source nor dest need to be aligned. -__forceinline void memcpy_amd_qwc(void *dest, const void *src, size_t qwc) +__fi void memcpy_amd_qwc(void *dest, const void *src, size_t qwc) { // Optimization Analysis: This code is *nearly* optimal. Do not think that using XMM // registers will improve copy performance, because they won't. Use of XMMs is only diff --git a/common/src/Utilities/x86/MemcpyVibes.cpp b/common/src/Utilities/x86/MemcpyVibes.cpp index 2ee7b5e486..33d2664f09 100644 --- a/common/src/Utilities/x86/MemcpyVibes.cpp +++ b/common/src/Utilities/x86/MemcpyVibes.cpp @@ -29,7 +29,7 @@ __aligned16 _memCpyCall _memcpy_vibes[_maxSize+1]; // this version uses SSE intrinsics to perform an inline copy. MSVC disasm shows pretty // decent code generation on whole, but it hasn't been benchmarked at all yet --air -__forceinline void memcpy_vibes(void * dest, const void * src, int size) { +__fi void memcpy_vibes(void * dest, const void * src, int size) { float (*destxmm)[4] = (float(*)[4])dest, (*srcxmm)[4] = (float(*)[4])src; size_t count = size & ~15, extra = size & 15; @@ -110,7 +110,7 @@ void gen_memcpy_vibes() { HostSys::MemProtectStatic(_memCpyExec, Protect_ReadOnly, true); } -__forceinline void memcpy_vibes(void * dest, const void * src, int size) { +__fi void memcpy_vibes(void * dest, const void * src, int size) { int offset = ((size & 0xf) - 7) << 4; _memcpy_vibes[size]((void*)((uptr)dest + offset), (void*)((uptr)src + offset)); } @@ -150,7 +150,7 @@ void gen_memcpy_vibes() { HostSys::MemProtectStatic(_memCpyExec, Protect_ReadOnly, true); } -__forceinline void memcpy_vibes(void * dest, const void * src, int size) { +__fi void memcpy_vibes(void * dest, const void * src, int size) { _memcpy_vibes[size](dest, src); } @@ -163,7 +163,7 @@ __forceinline void memcpy_vibes(void * dest, const void * src, int size) { // This can be moved later, but Linux doesn't even compile memcpyFast.cpp, so I figured I'd stick it here for now. // Quadword Copy! Count is in QWCs (128 bits). Neither source nor dest need to be aligned. - __forceinline void memcpy_amd_qwc(void *dest, const void *src, size_t qwc) + __fi void memcpy_amd_qwc(void *dest, const void *src, size_t qwc) { // Optimization Analysis: This code is *nearly* optimal. Do not think that using XMM // registers will improve copy performance, because they won't. Use of XMMs is only diff --git a/common/src/x86emitter/jmp.cpp b/common/src/x86emitter/jmp.cpp index 8ad8cb9523..f5a65e1a07 100644 --- a/common/src/x86emitter/jmp.cpp +++ b/common/src/x86emitter/jmp.cpp @@ -180,7 +180,7 @@ void xForwardJumpBase::_setTarget( uint opsize ) const } // returns the inverted conditional type for this Jcc condition. Ie, JNS will become JS. -__forceinline JccComparisonType xInvertCond( JccComparisonType src ) +__fi JccComparisonType xInvertCond( JccComparisonType src ) { pxAssert( src != Jcc_Unknown ); if( Jcc_Unconditional == src ) return Jcc_Unconditional; diff --git a/common/src/x86emitter/simd.cpp b/common/src/x86emitter/simd.cpp index 374ecdb4dd..6e15059fcf 100644 --- a/common/src/x86emitter/simd.cpp +++ b/common/src/x86emitter/simd.cpp @@ -134,57 +134,57 @@ const xImplSimd_DestRegSSE xPTEST = { 0x66,0x1738 }; // nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). // -__forceinline void xCVTDQ2PD( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0xe6 ); } -__forceinline void xCVTDQ2PD( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0xf3, 0xe6 ); } -__forceinline void xCVTDQ2PS( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x5b ); } -__forceinline void xCVTDQ2PS( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x00, 0x5b ); } +__fi void xCVTDQ2PD( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0xe6 ); } +__fi void xCVTDQ2PD( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0xf3, 0xe6 ); } +__fi void xCVTDQ2PS( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x5b ); } +__fi void xCVTDQ2PS( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x00, 0x5b ); } -__forceinline void xCVTPD2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0xe6 ); } -__forceinline void xCVTPD2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0xf2, 0xe6 ); } -__forceinline void xCVTPD2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x2d ); } -__forceinline void xCVTPD2PI( const xRegisterMMX& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x2d ); } -__forceinline void xCVTPD2PS( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x5a ); } -__forceinline void xCVTPD2PS( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x5a ); } +__fi void xCVTPD2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0xe6 ); } +__fi void xCVTPD2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0xf2, 0xe6 ); } +__fi void xCVTPD2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x2d ); } +__fi void xCVTPD2PI( const xRegisterMMX& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x2d ); } +__fi void xCVTPD2PS( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x5a ); } +__fi void xCVTPD2PS( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x5a ); } -__forceinline void xCVTPI2PD( const xRegisterSSE& to, const xRegisterMMX& from ) { OpWriteSSE( 0x66, 0x2a ); } -__forceinline void xCVTPI2PD( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0x66, 0x2a ); } -__forceinline void xCVTPI2PS( const xRegisterSSE& to, const xRegisterMMX& from ) { OpWriteSSE( 0x00, 0x2a ); } -__forceinline void xCVTPI2PS( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x2a ); } +__fi void xCVTPI2PD( const xRegisterSSE& to, const xRegisterMMX& from ) { OpWriteSSE( 0x66, 0x2a ); } +__fi void xCVTPI2PD( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0x66, 0x2a ); } +__fi void xCVTPI2PS( const xRegisterSSE& to, const xRegisterMMX& from ) { OpWriteSSE( 0x00, 0x2a ); } +__fi void xCVTPI2PS( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x2a ); } -__forceinline void xCVTPS2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x5b ); } -__forceinline void xCVTPS2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x5b ); } -__forceinline void xCVTPS2PD( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x5a ); } -__forceinline void xCVTPS2PD( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x5a ); } -__forceinline void xCVTPS2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x2d ); } -__forceinline void xCVTPS2PI( const xRegisterMMX& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x2d ); } +__fi void xCVTPS2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x5b ); } +__fi void xCVTPS2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x5b ); } +__fi void xCVTPS2PD( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x5a ); } +__fi void xCVTPS2PD( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x5a ); } +__fi void xCVTPS2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x2d ); } +__fi void xCVTPS2PI( const xRegisterMMX& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x2d ); } -__forceinline void xCVTSD2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0x2d ); } -__forceinline void xCVTSD2SI( const xRegister32& to, const xIndirect64& from ) { OpWriteSSE( 0xf2, 0x2d ); } -__forceinline void xCVTSD2SS( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0x5a ); } -__forceinline void xCVTSD2SS( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0xf2, 0x5a ); } -__forceinline void xCVTSI2SD( const xRegisterMMX& to, const xRegister32& from ) { OpWriteSSE( 0xf2, 0x2a ); } -__forceinline void xCVTSI2SD( const xRegisterMMX& to, const xIndirect32& from ) { OpWriteSSE( 0xf2, 0x2a ); } -__forceinline void xCVTSI2SS( const xRegisterSSE& to, const xRegister32& from ) { OpWriteSSE( 0xf3, 0x2a ); } -__forceinline void xCVTSI2SS( const xRegisterSSE& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x2a ); } +__fi void xCVTSD2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0x2d ); } +__fi void xCVTSD2SI( const xRegister32& to, const xIndirect64& from ) { OpWriteSSE( 0xf2, 0x2d ); } +__fi void xCVTSD2SS( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0x5a ); } +__fi void xCVTSD2SS( const xRegisterSSE& to, const xIndirect64& from ) { OpWriteSSE( 0xf2, 0x5a ); } +__fi void xCVTSI2SD( const xRegisterMMX& to, const xRegister32& from ) { OpWriteSSE( 0xf2, 0x2a ); } +__fi void xCVTSI2SD( const xRegisterMMX& to, const xIndirect32& from ) { OpWriteSSE( 0xf2, 0x2a ); } +__fi void xCVTSI2SS( const xRegisterSSE& to, const xRegister32& from ) { OpWriteSSE( 0xf3, 0x2a ); } +__fi void xCVTSI2SS( const xRegisterSSE& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x2a ); } -__forceinline void xCVTSS2SD( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x5a ); } -__forceinline void xCVTSS2SD( const xRegisterSSE& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x5a ); } -__forceinline void xCVTSS2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x2d ); } -__forceinline void xCVTSS2SI( const xRegister32& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x2d ); } +__fi void xCVTSS2SD( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x5a ); } +__fi void xCVTSS2SD( const xRegisterSSE& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x5a ); } +__fi void xCVTSS2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x2d ); } +__fi void xCVTSS2SI( const xRegister32& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x2d ); } -__forceinline void xCVTTPD2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0xe6 ); } -__forceinline void xCVTTPD2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0xe6 ); } -__forceinline void xCVTTPD2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x2c ); } -__forceinline void xCVTTPD2PI( const xRegisterMMX& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x2c ); } -__forceinline void xCVTTPS2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x5b ); } -__forceinline void xCVTTPS2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0xf3, 0x5b ); } -__forceinline void xCVTTPS2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x2c ); } -__forceinline void xCVTTPS2PI( const xRegisterMMX& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x2c ); } +__fi void xCVTTPD2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0xe6 ); } +__fi void xCVTTPD2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0xe6 ); } +__fi void xCVTTPD2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x66, 0x2c ); } +__fi void xCVTTPD2PI( const xRegisterMMX& to, const xIndirect128& from ) { OpWriteSSE( 0x66, 0x2c ); } +__fi void xCVTTPS2DQ( const xRegisterSSE& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x5b ); } +__fi void xCVTTPS2DQ( const xRegisterSSE& to, const xIndirect128& from ) { OpWriteSSE( 0xf3, 0x5b ); } +__fi void xCVTTPS2PI( const xRegisterMMX& to, const xRegisterSSE& from ) { OpWriteSSE( 0x00, 0x2c ); } +__fi void xCVTTPS2PI( const xRegisterMMX& to, const xIndirect64& from ) { OpWriteSSE( 0x00, 0x2c ); } -__forceinline void xCVTTSD2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0x2c ); } -__forceinline void xCVTTSD2SI( const xRegister32& to, const xIndirect64& from ) { OpWriteSSE( 0xf2, 0x2c ); } -__forceinline void xCVTTSS2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x2c ); } -__forceinline void xCVTTSS2SI( const xRegister32& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x2c ); } +__fi void xCVTTSD2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf2, 0x2c ); } +__fi void xCVTTSD2SI( const xRegister32& to, const xIndirect64& from ) { OpWriteSSE( 0xf2, 0x2c ); } +__fi void xCVTTSS2SI( const xRegister32& to, const xRegisterSSE& from ) { OpWriteSSE( 0xf3, 0x2c ); } +__fi void xCVTTSS2SI( const xRegister32& to, const xIndirect32& from ) { OpWriteSSE( 0xf3, 0x2c ); } // ------------------------------------------------------------------------ @@ -452,7 +452,7 @@ const xImplSimd_PMinMax xPMAX = // SIMD Shuffle/Pack (Shuffle puck?) // ===================================================================================================== -__forceinline void xImplSimd_Shuffle::_selector_assertion_check( u8 selector ) const +__fi void xImplSimd_Shuffle::_selector_assertion_check( u8 selector ) const { pxAssertMsg( (selector & ~3) == 0, "Invalid immediate operand on SSE Shuffle: Upper 6 bits of the SSE Shuffle-PD Selector are reserved and must be zero." @@ -684,43 +684,43 @@ const xImplSimd_DestRegSSE xMOVSHDUP = { 0xf3,0x16 }; // * MOVD has valid forms for MMX and XMM registers. // -__forceinline void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ) { xOpWrite0F( 0x66, 0x6e, to, from ); } -__forceinline void xMOVDZX( const xRegisterSSE& to, const xIndirectVoid& src ) { xOpWrite0F( 0x66, 0x6e, to, src ); } +__fi void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ) { xOpWrite0F( 0x66, 0x6e, to, from ); } +__fi void xMOVDZX( const xRegisterSSE& to, const xIndirectVoid& src ) { xOpWrite0F( 0x66, 0x6e, to, src ); } -__forceinline void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ) { xOpWrite0F( 0x6e, to, from ); } -__forceinline void xMOVDZX( const xRegisterMMX& to, const xIndirectVoid& src ) { xOpWrite0F( 0x6e, to, src ); } +__fi void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ) { xOpWrite0F( 0x6e, to, from ); } +__fi void xMOVDZX( const xRegisterMMX& to, const xIndirectVoid& src ) { xOpWrite0F( 0x6e, to, src ); } -__forceinline void xMOVD( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, to ); } -__forceinline void xMOVD( const xIndirectVoid& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); } +__fi void xMOVD( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, to ); } +__fi void xMOVD( const xIndirectVoid& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); } -__forceinline void xMOVD( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, to ); } -__forceinline void xMOVD( const xIndirectVoid& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); } +__fi void xMOVD( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, to ); } +__fi void xMOVD( const xIndirectVoid& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0xf3, 0x7e, to, from ); } +__fi void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0xf3, 0x7e, to, from ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const xIndirectVoid& src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } +__fi void xMOVQZX( const xRegisterSSE& to, const xIndirectVoid& src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } +__fi void xMOVQZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } // Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void xMOVQ( const xIndirectVoid& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); } +__fi void xMOVQ( const xIndirectVoid& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); } -__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) xOpWrite0F( 0x6f, to, from ); } -__forceinline void xMOVQ( const xRegisterMMX& to, const xIndirectVoid& src ) { xOpWrite0F( 0x6f, to, src ); } -__forceinline void xMOVQ( const xIndirectVoid& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); } +__fi void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) xOpWrite0F( 0x6f, to, from ); } +__fi void xMOVQ( const xRegisterMMX& to, const xIndirectVoid& src ) { xOpWrite0F( 0x6f, to, src ); } +__fi void xMOVQ( const xIndirectVoid& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); } // This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ' -__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf3, 0xd6, to, from ); } +__fi void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf3, 0xd6, to, from ); } // This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q' -__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) +__fi void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) { // Manual implementation of this form of MOVQ, since its parameters are unique in a way // that breaks the template inference of writeXMMop(); @@ -733,9 +733,9 @@ __forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) // #define IMPLEMENT_xMOVS( ssd, prefix ) \ - __forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) xOpWrite0F( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const xIndirectVoid& from ) { xOpWrite0F( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } + __fi void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) xOpWrite0F( prefix, 0x10, to, from ); } \ + __fi void xMOV##ssd##ZX( const xRegisterSSE& to, const xIndirectVoid& from ) { xOpWrite0F( prefix, 0x10, to, from ); } \ + __fi void xMOV##ssd( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } IMPLEMENT_xMOVS( SS, 0xf3 ) IMPLEMENT_xMOVS( SD, 0xf2 ) @@ -744,31 +744,31 @@ IMPLEMENT_xMOVS( SD, 0xf2 ) // Non-temporal movs only support a register as a target (ie, load form only, no stores) // -__forceinline void xMOVNTDQA( const xRegisterSSE& to, const xIndirectVoid& from ) +__fi void xMOVNTDQA( const xRegisterSSE& to, const xIndirectVoid& from ) { xWrite32( 0x2A380f66 ); EmitSibMagic( to.Id, from ); } -__forceinline void xMOVNTDQA( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); } +__fi void xMOVNTDQA( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); } -__forceinline void xMOVNTPD( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); } -__forceinline void xMOVNTPS( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); } +__fi void xMOVNTPD( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); } +__fi void xMOVNTPS( const xIndirectVoid& to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); } -__forceinline void xMOVNTQ( const xIndirectVoid& to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); } +__fi void xMOVNTQ( const xIndirectVoid& to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); } // ------------------------------------------------------------------------ -__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x50, to, from ); } -__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x66, 0x50, to, from, true ); } +__fi void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x50, to, from ); } +__fi void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x66, 0x50, to, from, true ); } // xMASKMOV: // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. // The default memory location is specified by DS:EDI. The most significant bit in each byte // of the mask operand determines whether the corresponding byte in the source operand is // written to the corresponding byte location in memory. -__forceinline void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xf7, to, from ); } -__forceinline void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf7, to, from ); } +__fi void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xf7, to, from ); } +__fi void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf7, to, from ); } // xPMOVMSKB: // Creates a mask made up of the most significant bit of each byte of the source @@ -778,15 +778,15 @@ __forceinline void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) // When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on // 128-bit (SSE) source, the byte mask is 16-bits. // -__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd7, to, from ); } -__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0xd7, to, from ); } +__fi void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd7, to, from ); } +__fi void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0xd7, to, from ); } // [sSSE-3] Concatenates dest and source operands into an intermediate composite, // shifts the composite at byte granularity to the right by a constant immediate, // and extracts the right-aligned result into the destination. // -__forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x0f3a, to, from, imm8 ); } -__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); } +__fi void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x0f3a, to, from, imm8 ); } +__fi void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); } // -------------------------------------------------------------------------------------- @@ -826,14 +826,14 @@ __emitinline void xEXTRACTPS( const xIndirect32& dest, const xRegisterSSE& from, // Converts from MMX register mode to FPU register mode. The cpu enters MMX register mode // when ever MMX instructions are run, and if FPU instructions are run without using EMMS, // the FPU results will be invalid. -__forceinline void xEMMS() { xWrite16( 0x770F ); } +__fi void xEMMS() { xWrite16( 0x770F ); } // [3DNow] Same as EMMS, but an AMD special version which may (or may not) leave MMX regs // in an undefined state (which is fine, since presumably you're done using them anyway). // This instruction is thus faster than EMMS on K8s, but all newer AMD cpus use the same // logic for either EMMS or FEMMS. // Conclusion: Obsolete. Just use EMMS instead. -__forceinline void xFEMMS() { xWrite16( 0x0E0F ); } +__fi void xFEMMS() { xWrite16( 0x0E0F ); } // Store Streaming SIMD Extension Control/Status to Mem32. diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index 610a2a0347..6e0d2b6c1d 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -72,22 +72,22 @@ template void xWrite( u32 val ); template void xWrite( u64 val ); template void xWrite( u128 val ); -__forceinline void xWrite8( u8 val ) +__fi void xWrite8( u8 val ) { xWrite( val ); } -__forceinline void xWrite16( u16 val ) +__fi void xWrite16( u16 val ) { xWrite( val ); } -__forceinline void xWrite32( u32 val ) +__fi void xWrite32( u32 val ) { xWrite( val ); } -__forceinline void xWrite64( u64 val ) +__fi void xWrite64( u64 val ) { xWrite( val ); } @@ -213,12 +213,12 @@ const char* xRegisterBase::GetName() // (btw, I know this isn't a critical performance item by any means, but it's // annoying simply because it *should* be an easy thing to optimize) -static __forceinline void ModRM( uint mod, uint reg, uint rm ) +static __fi void ModRM( uint mod, uint reg, uint rm ) { xWrite8( (mod << 6) | (reg << 3) | rm ); } -static __forceinline void SibSB( u32 ss, u32 index, u32 base ) +static __fi void SibSB( u32 ss, u32 index, u32 base ) { xWrite8( (ss << 6) | (index << 3) | base ); } @@ -260,7 +260,7 @@ __emitinline void xOpWrite0F( u16 opcode, int instId, const xIndirectVoid& sib ) ////////////////////////////////////////////////////////////////////////////////////////// // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the // instruction ca be encoded as ModRm alone. -static __forceinline bool NeedsSibMagic( const xIndirectVoid& info ) +static __fi bool NeedsSibMagic( const xIndirectVoid& info ) { // no registers? no sibs! // (xIndirectVoid::Reduce always places a register in Index, and optionally leaves @@ -952,37 +952,37 @@ __emitinline void xPUSH( const xIndirectVoid& from ) EmitSibMagic( 6, from ); } -__forceinline void xPOP( xRegister32 from ) { xWrite8( 0x58 | from.Id ); } +__fi void xPOP( xRegister32 from ) { xWrite8( 0x58 | from.Id ); } -__forceinline void xPUSH( u32 imm ) { xWrite8( 0x68 ); xWrite32( imm ); } -__forceinline void xPUSH( xRegister32 from ) { xWrite8( 0x50 | from.Id ); } +__fi void xPUSH( u32 imm ) { xWrite8( 0x68 ); xWrite32( imm ); } +__fi void xPUSH( xRegister32 from ) { xWrite8( 0x50 | from.Id ); } // pushes the EFLAGS register onto the stack -__forceinline void xPUSHFD() { xWrite8( 0x9C ); } +__fi void xPUSHFD() { xWrite8( 0x9C ); } // pops the EFLAGS register from the stack -__forceinline void xPOPFD() { xWrite8( 0x9D ); } +__fi void xPOPFD() { xWrite8( 0x9D ); } ////////////////////////////////////////////////////////////////////////////////////////// // -__forceinline void xLEAVE() { xWrite8( 0xC9 ); } -__forceinline void xRET() { xWrite8( 0xC3 ); } -__forceinline void xCBW() { xWrite16( 0x9866 ); } -__forceinline void xCWD() { xWrite8( 0x98 ); } -__forceinline void xCDQ() { xWrite8( 0x99 ); } -__forceinline void xCWDE() { xWrite8( 0x98 ); } +__fi void xLEAVE() { xWrite8( 0xC9 ); } +__fi void xRET() { xWrite8( 0xC3 ); } +__fi void xCBW() { xWrite16( 0x9866 ); } +__fi void xCWD() { xWrite8( 0x98 ); } +__fi void xCDQ() { xWrite8( 0x99 ); } +__fi void xCWDE() { xWrite8( 0x98 ); } -__forceinline void xLAHF() { xWrite8( 0x9f ); } -__forceinline void xSAHF() { xWrite8( 0x9e ); } +__fi void xLAHF() { xWrite8( 0x9f ); } +__fi void xSAHF() { xWrite8( 0x9e ); } -__forceinline void xSTC() { xWrite8( 0xF9 ); } -__forceinline void xCLC() { xWrite8( 0xF8 ); } +__fi void xSTC() { xWrite8( 0xF9 ); } +__fi void xCLC() { xWrite8( 0xF8 ); } // NOP 1-byte -__forceinline void xNOP() { xWrite8(0x90); } +__fi void xNOP() { xWrite8(0x90); } -__forceinline void xINT( u8 imm ) +__fi void xINT( u8 imm ) { if (imm == 3) xWrite8(0xcc); @@ -993,7 +993,7 @@ __forceinline void xINT( u8 imm ) } } -__forceinline void xINTO() { xWrite8(0xce); } +__fi void xINTO() { xWrite8(0xce); } __emitinline void xBSWAP( const xRegister32& to ) { diff --git a/pcsx2/CDVD/CDVD.cpp b/pcsx2/CDVD/CDVD.cpp index 046741b07e..1f6cf7e9a9 100644 --- a/pcsx2/CDVD/CDVD.cpp +++ b/pcsx2/CDVD/CDVD.cpp @@ -37,7 +37,7 @@ wxString DiscSerial; static cdvdStruct cdvd; -static __forceinline void SetResultSize(u8 size) +static __fi void SetResultSize(u8 size) { cdvd.ResultC = size; cdvd.ResultP = 0; @@ -308,7 +308,7 @@ s32 cdvdWriteConfig(const u8* config) static MutexRecursive Mutex_NewDiskCB; // Sets ElfCRC to the CRC of the game bound to the CDVD plugin. -static __forceinline ElfObject* loadElf( const wxString filename ) +static __fi ElfObject* loadElf( const wxString filename ) { if (filename.StartsWith(L"host")) return new ElfObject(filename.After(':'), Path::GetFileSize(filename.After(':'))); @@ -338,7 +338,7 @@ static __forceinline ElfObject* loadElf( const wxString filename ) return new ElfObject(filename, file); } -static __forceinline void _reloadElfInfo(wxString elfpath) +static __fi void _reloadElfInfo(wxString elfpath) { ScopedPtr elfptr; @@ -417,7 +417,7 @@ void cdvdReloadElfInfo(wxString elfoverride) } } -static __forceinline s32 StrToS32(const wxString& str, int base = 10) +static __fi s32 StrToS32(const wxString& str, int base = 10) { long l; str.ToLong(&l, base); @@ -540,7 +540,7 @@ s32 cdvdGetTrayStatus() // cdvdNewDiskCB() can update it's status as well... // Modified by (efp) - 16/01/2006 -static __forceinline void cdvdGetDiskType() +static __fi void cdvdGetDiskType() { cdvd.Type = DoCDVDdetectDiskType(); } @@ -741,7 +741,7 @@ int cdvdReadSector() { } // inlined due to being referenced in only one place. -__forceinline void cdvdActionInterrupt() +__fi void cdvdActionInterrupt() { switch( cdvd.Action ) { @@ -786,7 +786,7 @@ __forceinline void cdvdActionInterrupt() } // inlined due to being referenced in only one place. -__forceinline void cdvdReadInterrupt() +__fi void cdvdReadInterrupt() { //Console.WriteLn("cdvdReadInterrupt %x %x %x %x %x", cpuRegs.interrupt, cdvd.Readed, cdvd.Reading, cdvd.nSectors, (HW_DMA3_BCR_H16 * HW_DMA3_BCR_L16) *4); @@ -983,7 +983,7 @@ void cdvdVsync() { cdvd.RTC.year = 0; } -static __forceinline u8 cdvdRead18(void) // SDATAOUT +static __fi u8 cdvdRead18(void) // SDATAOUT { u8 ret = 0; @@ -1348,7 +1348,7 @@ static void cdvdWrite04(u8 rt) { // NCOMMAND cdvd.ParamC = 0; } -static __forceinline void cdvdWrite05(u8 rt) { // NDATAIN +static __fi void cdvdWrite05(u8 rt) { // NDATAIN CDVD_LOG("cdvdWrite05(NDataIn) %x", rt); if (cdvd.ParamP < 32) { @@ -1357,12 +1357,12 @@ static __forceinline void cdvdWrite05(u8 rt) { // NDATAIN } } -static __forceinline void cdvdWrite06(u8 rt) { // HOWTO +static __fi void cdvdWrite06(u8 rt) { // HOWTO CDVD_LOG("cdvdWrite06(HowTo) %x", rt); cdvd.HowTo = rt; } -static __forceinline void cdvdWrite07(u8 rt) // BREAK +static __fi void cdvdWrite07(u8 rt) // BREAK { CDVD_LOG("cdvdWrite07(Break) %x", rt); @@ -1386,21 +1386,21 @@ static __forceinline void cdvdWrite07(u8 rt) // BREAK //cdvd.nCommand = 0; } -static __forceinline void cdvdWrite08(u8 rt) { // INTR_STAT +static __fi void cdvdWrite08(u8 rt) { // INTR_STAT CDVD_LOG("cdvdWrite08(IntrReason) = ACK(%x)", rt); cdvd.PwOff &= ~rt; } -static __forceinline void cdvdWrite0A(u8 rt) { // STATUS +static __fi void cdvdWrite0A(u8 rt) { // STATUS CDVD_LOG("cdvdWrite0A(Status) %x", rt); } -static __forceinline void cdvdWrite0F(u8 rt) { // TYPE +static __fi void cdvdWrite0F(u8 rt) { // TYPE CDVD_LOG("cdvdWrite0F(Type) %x", rt); DevCon.WriteLn("*PCSX2*: CDVD TYPE %x", rt); } -static __forceinline void cdvdWrite14(u8 rt) { // PS1 MODE?? +static __fi void cdvdWrite14(u8 rt) { // PS1 MODE?? u32 cycle = psxRegs.cycle; if (rt == 0xFE) @@ -1414,7 +1414,7 @@ static __forceinline void cdvdWrite14(u8 rt) { // PS1 MODE?? psxRegs.cycle = cycle; } -static __forceinline void fail_pol_cal() +static __fi void fail_pol_cal() { Console.Error("[MG] ERROR - Make sure the file is already decrypted!!!"); cdvd.Result[0] = 0x80; @@ -2025,7 +2025,7 @@ static void cdvdWrite16(u8 rt) // SCOMMAND cdvd.ParamC = 0; } -static __forceinline void cdvdWrite17(u8 rt) { // SDATAIN +static __fi void cdvdWrite17(u8 rt) { // SDATAIN CDVD_LOG("cdvdWrite17(SDataIn) %x", rt); if (cdvd.ParamP < 32) { @@ -2034,12 +2034,12 @@ static __forceinline void cdvdWrite17(u8 rt) { // SDATAIN } } -static __forceinline void cdvdWrite18(u8 rt) { // SDATAOUT +static __fi void cdvdWrite18(u8 rt) { // SDATAOUT CDVD_LOG("cdvdWrite18(SDataOut) %x", rt); Console.WriteLn("*PCSX2* SDATAOUT"); } -static __forceinline void cdvdWrite3A(u8 rt) { // DEC-SET +static __fi void cdvdWrite3A(u8 rt) { // DEC-SET CDVD_LOG("cdvdWrite3A(DecSet) %x", rt); cdvd.decSet = rt; Console.WriteLn("DecSet Write: %02X", cdvd.decSet); diff --git a/pcsx2/CDVD/CDVD.h b/pcsx2/CDVD/CDVD.h index 086c5b30e3..7ff20ab458 100644 --- a/pcsx2/CDVD/CDVD.h +++ b/pcsx2/CDVD/CDVD.h @@ -23,7 +23,7 @@ #define btoi(b) ((b)/16*10 + (b)%16) /* BCD to u_char */ #define itob(i) ((i)/10*16 + (i)%10) /* u_char to BCD */ -static __forceinline s32 msf_to_lsn(u8 *Time) +static __fi s32 msf_to_lsn(u8 *Time) { u32 lsn; @@ -33,7 +33,7 @@ static __forceinline s32 msf_to_lsn(u8 *Time) return lsn; } -static __forceinline s32 msf_to_lba(u8 m, u8 s, u8 f) +static __fi s32 msf_to_lba(u8 m, u8 s, u8 f) { u32 lsn; lsn = f; @@ -42,7 +42,7 @@ static __forceinline s32 msf_to_lba(u8 m, u8 s, u8 f) return lsn; } -static __forceinline void lsn_to_msf(u8 *Time, s32 lsn) +static __fi void lsn_to_msf(u8 *Time, s32 lsn) { u8 m, s, f; @@ -56,7 +56,7 @@ static __forceinline void lsn_to_msf(u8 *Time, s32 lsn) Time[2] = itob(f); } -static __forceinline void lba_to_msf(s32 lba, u8* m, u8* s, u8* f) +static __fi void lba_to_msf(s32 lba, u8* m, u8* s, u8* f) { lba += 150; *m = lba / (60 * 75); diff --git a/pcsx2/CDVD/CdRom.cpp b/pcsx2/CDVD/CdRom.cpp index 95da293bcf..f531b1477a 100644 --- a/pcsx2/CDVD/CdRom.cpp +++ b/pcsx2/CDVD/CdRom.cpp @@ -94,28 +94,28 @@ u32 cdReadTime;// = ((PSXCLK / 75) / BIAS); static void AddIrqQueue(u8 irq, u32 ecycle); -static __forceinline void StartReading(u32 type) { +static __fi void StartReading(u32 type) { cdr.Reading = type; cdr.FirstSector = 1; cdr.Readed = 0xff; AddIrqQueue(READ_ACK, 0x800); } -static __forceinline void StopReading() { +static __fi void StopReading() { if (cdr.Reading) { cdr.Reading = 0; psxRegs.interrupt &= ~(1<> 12]; } -static __forceinline u32 getMemW(s32 mem) +static __fi u32 getMemW(s32 mem) { return 0;//memLUTW[mem>>12]; } diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 822c3879eb..22a99ddc01 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -17,6 +17,15 @@ #include "Pcsx2Defs.h" +static const s64 _1mb = 0x100000; +static const s64 _8mb = _1mb * 8; +static const s64 _16mb = _1mb * 16; +static const s64 _256mb = _1mb * 256; +static const s64 _1gb = _256mb * 4; + +static const u32 BIAS = 2; // Bus is half of the actual ps2 speed +static const u32 PS2CLK = 294912000; //hz /* 294.912 mhz */ + #include "System.h" #include "Memory.h" #include "R5900.h" @@ -26,9 +35,6 @@ #include "SaveState.h" #include "DebugTools/Debug.h" -static const u32 BIAS = 2; // Bus is half of the actual ps2 speed -static const u32 PS2CLK = 294912000; //hz /* 294.912 mhz */ - extern wxString ShiftJIS_ConvertString( const char* src ); extern wxString ShiftJIS_ConvertString( const char* src, int maxlen ); diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index 38ec937d9c..e6d1d88092 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -54,7 +54,7 @@ void rcntReset(int index) { // Updates the state of the nextCounter value (if needed) to serve // any pending events for the given counter. // Call this method after any modifications to the state of a counter. -static __forceinline void _rcntSet( int cntidx ) +static __fi void _rcntSet( int cntidx ) { s32 c; jASSUME( cntidx <= 4 ); // rcntSet isn't valid for h/vsync counters. @@ -106,7 +106,7 @@ static __forceinline void _rcntSet( int cntidx ) } -static __forceinline void cpuRcntSet() +static __fi void cpuRcntSet() { int i; @@ -286,7 +286,7 @@ void frameLimitReset() // Framelimiter - Measures the delta time between calls and stalls until a // certain amount of time passes if such time hasn't passed yet. // See the GS FrameSkip function for details on why this is here and not in the GS. -static __forceinline void frameLimit() +static __fi void frameLimit() { // 999 means the user would rather just have framelimiting turned off... if( !EmuConfig.GS.FrameLimitEnable ) return; @@ -331,7 +331,7 @@ static __forceinline void frameLimit() // starting this frame, it'll just sleep longer the next to make up for it. :) } -static __forceinline void VSyncStart(u32 sCycle) +static __fi void VSyncStart(u32 sCycle) { GetCoreThread().VsyncInThread(); Cpu->CheckExecutionState(); @@ -380,7 +380,7 @@ static __forceinline void VSyncStart(u32 sCycle) // Should no longer be required (Refraction) } -static __forceinline void VSyncEnd(u32 sCycle) +static __fi void VSyncEnd(u32 sCycle) { EECNT_LOG( "///////// EE COUNTER VSYNC END (frame: %d) \\\\\\\\\\\\\\\\\\\\", g_FrameCount ); @@ -404,7 +404,7 @@ static u32 hsc=0; static int vblankinc = 0; #endif -__forceinline void rcntUpdate_hScanline() +__fi void rcntUpdate_hScanline() { if( !cpuTestCycle( hsyncCounter.sCycle, hsyncCounter.CycleT ) ) return; @@ -441,7 +441,7 @@ __forceinline void rcntUpdate_hScanline() } } -__forceinline void rcntUpdate_vSync() +__fi void rcntUpdate_vSync() { s32 diff = (cpuRegs.cycle - vsyncCounter.sCycle); if( diff < vsyncCounter.CycleT ) return; @@ -478,7 +478,7 @@ __forceinline void rcntUpdate_vSync() } } -static __forceinline void _cpuTestTarget( int i ) +static __fi void _cpuTestTarget( int i ) { if (counters[i].count < counters[i].target) return; @@ -497,7 +497,7 @@ static __forceinline void _cpuTestTarget( int i ) else counters[i].target |= EECNT_FUTURE_TARGET; } -static __forceinline void _cpuTestOverflow( int i ) +static __fi void _cpuTestOverflow( int i ) { if (counters[i].count <= 0xffff) return; @@ -516,7 +516,7 @@ static __forceinline void _cpuTestOverflow( int i ) // forceinline note: this method is called from two locations, but one // of them is the interpreter, which doesn't count. ;) So might as // well forceinline it! -__forceinline void rcntUpdate() +__fi void rcntUpdate() { rcntUpdate_vSync(); @@ -550,7 +550,7 @@ __forceinline void rcntUpdate() cpuRcntSet(); } -static __forceinline void _rcntSetGate( int index ) +static __fi void _rcntSetGate( int index ) { if (counters[index].mode.EnableGate) { @@ -575,7 +575,7 @@ static __forceinline void _rcntSetGate( int index ) } // mode - 0 means hblank source, 8 means vblank source. -__forceinline void rcntStartGate(bool isVblank, u32 sCycle) +__fi void rcntStartGate(bool isVblank, u32 sCycle) { int i; @@ -636,7 +636,7 @@ __forceinline void rcntStartGate(bool isVblank, u32 sCycle) } // mode - 0 means hblank signal, 8 means vblank signal. -__forceinline void rcntEndGate(bool isVblank , u32 sCycle) +__fi void rcntEndGate(bool isVblank , u32 sCycle) { int i; @@ -677,7 +677,7 @@ __forceinline void rcntEndGate(bool isVblank , u32 sCycle) // rcntUpdate, since we're being called from there anyway. } -__forceinline void rcntWmode(int index, u32 value) +__fi void rcntWmode(int index, u32 value) { if(counters[index].mode.IsCounting) { if(counters[index].mode.ClockSource != 0x3) { @@ -711,7 +711,7 @@ __forceinline void rcntWmode(int index, u32 value) _rcntSet( index ); } -__forceinline void rcntWcount(int index, u32 value) +__fi void rcntWcount(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeCount = %x, oldcount=%x, target=%x", index, value, counters[index].count, counters[index].target ); @@ -737,7 +737,7 @@ __forceinline void rcntWcount(int index, u32 value) _rcntSet( index ); } -__forceinline void rcntWtarget(int index, u32 value) +__fi void rcntWtarget(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeTarget = %x", index, value); @@ -766,13 +766,13 @@ __forceinline void rcntWtarget(int index, u32 value) _rcntSet( index ); } -__forceinline void rcntWhold(int index, u32 value) +__fi void rcntWhold(int index, u32 value) { EECNT_LOG("EE Counter[%d] Hold Write = %x", index, value); counters[index].hold = value; } -__forceinline u32 rcntRcount(int index) +__fi u32 rcntRcount(int index) { u32 ret; @@ -787,7 +787,7 @@ __forceinline u32 rcntRcount(int index) return ret; } -__forceinline u32 rcntCycle(int index) +__fi u32 rcntCycle(int index) { if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3)) return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate); diff --git a/pcsx2/Dmac.h b/pcsx2/Dmac.h index f3432c7013..5c103b3212 100644 --- a/pcsx2/Dmac.h +++ b/pcsx2/Dmac.h @@ -215,9 +215,9 @@ union tDMA_QWC { wxString desc() const { return wxsFormat(L"QWC: 0x%x", _u32); } tDMA_TAG tag() { return (tDMA_TAG)_u32; } }; -static __forceinline void setDmacStat(u32 num); -static __forceinline tDMA_TAG *dmaGetAddr(u32 addr, bool write); -static __forceinline void throwBusError(const char *s); +static void setDmacStat(u32 num); +static tDMA_TAG *dmaGetAddr(u32 addr, bool write); +static void throwBusError(const char *s); struct DMACh { tDMA_CHCR chcr; @@ -374,7 +374,7 @@ union tDMAC_QUEUE bool empty() const { return (_u16 == 0); } }; -static __forceinline const wxChar* ChcrName(u32 addr) +static __fi const wxChar* ChcrName(u32 addr) { switch (addr) { @@ -393,7 +393,7 @@ static __forceinline const wxChar* ChcrName(u32 addr) } // Believe it or not, making this const can generate compiler warnings in gcc. -static __forceinline int ChannelNumber(u32 addr) +static __fi int ChannelNumber(u32 addr) { switch (addr) { @@ -607,19 +607,19 @@ struct INTCregisters #define dmacRegs ((DMACregisters*)(PS2MEM_HW+0xE000)) #define intcRegs ((INTCregisters*)(PS2MEM_HW+0xF000)) -static __forceinline void throwBusError(const char *s) +static __fi void throwBusError(const char *s) { Console.Error("%s BUSERR", s); dmacRegs->stat.BEIS = true; } -static __forceinline void setDmacStat(u32 num) +static __fi void setDmacStat(u32 num) { dmacRegs->stat.set_flags(1 << num); } // Note: Dma addresses are guaranteed to be aligned to 16 bytes (128 bits) -static __forceinline tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write) +static __fi tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write) { // if (addr & 0xf) { DMA_LOG("*PCSX2*: DMA address not 128bit aligned: %8.8x", addr); } @@ -653,7 +653,7 @@ static __forceinline tDMA_TAG *SPRdmaGetAddr(u32 addr, bool write) } // Note: Dma addresses are guaranteed to be aligned to 16 bytes (128 bits) -static __forceinline tDMA_TAG *dmaGetAddr(u32 addr, bool write) +static __ri tDMA_TAG *dmaGetAddr(u32 addr, bool write) { // if (addr & 0xf) { DMA_LOG("*PCSX2*: DMA address not 128bit aligned: %8.8x", addr); } if (DMA_TAG(addr).SPR) return (tDMA_TAG*)&psS[addr & 0x3ff0]; diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 27a8b1083d..d46137ee55 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -79,7 +79,7 @@ void gsReset() gsGIFReset(); } -static __forceinline void gsCSRwrite( const tGS_CSR& csr ) +static __fi void gsCSRwrite( const tGS_CSR& csr ) { if (csr.RESET) { @@ -137,7 +137,7 @@ static __forceinline void gsCSRwrite( const tGS_CSR& csr ) if(csr.EDWINT) CSRreg.EDWINT = false; } -static __forceinline void IMRwrite(u32 value) +static __fi void IMRwrite(u32 value) { GSIMR = (value & 0x1f00)|0x6000; @@ -161,7 +161,7 @@ static __forceinline void IMRwrite(u32 value) } } -__forceinline void gsWrite8(u32 mem, u8 value) +__fi void gsWrite8(u32 mem, u8 value) { switch (mem) { @@ -189,7 +189,7 @@ __forceinline void gsWrite8(u32 mem, u8 value) GIF_LOG("GS write 8 at %8.8lx with data %8.8lx", mem, value); } -static __forceinline void _gsSMODEwrite( u32 mem, u32 value ) +static __fi void _gsSMODEwrite( u32 mem, u32 value ) { switch (mem) { @@ -206,7 +206,7 @@ static __forceinline void _gsSMODEwrite( u32 mem, u32 value ) ////////////////////////////////////////////////////////////////////////// // GS Write 16 bit -__forceinline void gsWrite16(u32 mem, u16 value) +__fi void gsWrite16(u32 mem, u16 value) { GIF_LOG("GS write 16 at %8.8lx with data %8.8lx", mem, value); @@ -236,7 +236,7 @@ __forceinline void gsWrite16(u32 mem, u16 value) ////////////////////////////////////////////////////////////////////////// // GS Write 32 bit -__forceinline void gsWrite32(u32 mem, u32 value) +__fi void gsWrite32(u32 mem, u32 value) { pxAssume( (mem & 3) == 0 ); GIF_LOG("GS write 32 at %8.8lx with data %8.8lx", mem, value); @@ -353,25 +353,25 @@ void __fastcall gsWrite128_generic( u32 mem, const mem128_t* value ) writeTo[1] = value[1]; } -__forceinline u8 gsRead8(u32 mem) +__fi u8 gsRead8(u32 mem) { GIF_LOG("GS read 8 from %8.8lx value: %8.8lx", mem, *(u8*)PS2GS_BASE(mem)); return *(u8*)PS2GS_BASE(mem); } -__forceinline u16 gsRead16(u32 mem) +__fi u16 gsRead16(u32 mem) { GIF_LOG("GS read 16 from %8.8lx value: %8.8lx", mem, *(u16*)PS2GS_BASE(mem)); return *(u16*)PS2GS_BASE(mem); } -__forceinline u32 gsRead32(u32 mem) +__fi u32 gsRead32(u32 mem) { GIF_LOG("GS read 32 from %8.8lx value: %8.8lx", mem, *(u32*)PS2GS_BASE(mem)); return *(u32*)PS2GS_BASE(mem); } -__forceinline u64 gsRead64(u32 mem) +__fi u64 gsRead64(u32 mem) { // fixme - PS2GS_BASE(mem+4) = (g_RealGSMem+(mem + 4 & 0x13ff)) GIF_LOG("GS read 64 from %8.8lx value: %8.8lx_%8.8lx", mem, *(u32*)PS2GS_BASE(mem+4), *(u32*)PS2GS_BASE(mem) ); @@ -402,7 +402,7 @@ void gsIrq() { // functions are performed by the EE, which itself uses thread sleep logic to avoid spin // waiting as much as possible (maximizes CPU resource availability for the GS). -__forceinline void gsFrameSkip() +__fi void gsFrameSkip() { static int consec_skipped = 0; static int consec_drawn = 0; diff --git a/pcsx2/GSState.cpp b/pcsx2/GSState.cpp index 551cbffe35..303bcb51b9 100644 --- a/pcsx2/GSState.cpp +++ b/pcsx2/GSState.cpp @@ -61,7 +61,7 @@ void GSGIFTRANSFER3(u32 *pMem, u32 size) { GSgifTransfer3(pMem, size); } -__forceinline void GSVSYNC(void) { +__fi void GSVSYNC(void) { if( g_SaveGSStream == 2 ) { u32 type = GSRUN_VSYNC; g_fGSSave->Freeze( type ); diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index f8165729d7..381dcff791 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -40,7 +40,7 @@ __aligned16 u8 Path1Buffer[0x1000000]; u32 Path1WritePos = 0; u32 Path1ReadPos = 0; -static __forceinline void clearFIFOstuff(bool full) +static __fi void clearFIFOstuff(bool full) { if (full) CSRreg.FIFO = CSR_FIFO_FULL; @@ -93,7 +93,7 @@ void gsPath1Interrupt() extern bool SIGNAL_IMR_Pending; -__forceinline void gsInterrupt() +__fi void gsInterrupt() { GIF_LOG("gsInterrupt: %8.8x", cpuRegs.cycle); @@ -182,7 +182,7 @@ int _GIFchain() return WRITERING_DMA(pMem, gif->qwc); } -static __forceinline void GIFchain() +static __fi void GIFchain() { // qwc check now done outside this function // Voodoocycles @@ -190,7 +190,7 @@ static __forceinline void GIFchain() /*if (gif->qwc)*/ gscycles+= ( _GIFchain() * BIAS); /* guessing */ } -static __forceinline bool checkTieBit(tDMA_TAG* &ptag) +static __fi bool checkTieBit(tDMA_TAG* &ptag) { if (gif->chcr.TIE && ptag->IRQ) { @@ -202,7 +202,7 @@ static __forceinline bool checkTieBit(tDMA_TAG* &ptag) return false; } -static __forceinline tDMA_TAG* ReadTag() +static __fi tDMA_TAG* ReadTag() { tDMA_TAG* ptag = dmaGetAddr(gif->tadr, false); //Set memory pointer to TADR @@ -215,7 +215,7 @@ static __forceinline tDMA_TAG* ReadTag() return ptag; } -static __forceinline tDMA_TAG* ReadTag2() +static __fi tDMA_TAG* ReadTag2() { tDMA_TAG* ptag = dmaGetAddr(gif->tadr, false); //Set memory pointer to TADR @@ -443,7 +443,7 @@ void dmaGIF() } // called from only one location, so forceinline it: -static __forceinline bool mfifoGIFrbTransfer() +static __fi bool mfifoGIFrbTransfer() { u32 mfifoqwc = min(gifqwc, (u32)gif->qwc); u32 *src; @@ -492,7 +492,7 @@ static __forceinline bool mfifoGIFrbTransfer() } // called from only one location, so forceinline it: -static __forceinline bool mfifoGIFchain() +static __fi bool mfifoGIFchain() { /* Is QWC = 0? if so there is nothing to transfer */ if (gif->qwc == 0) return true; diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index f11641dba8..4004de91ad 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -76,7 +76,7 @@ void hwReset() vif1Reset(); } -__forceinline void intcInterrupt() +__fi void intcInterrupt() { if ((psHu32(INTC_STAT)) == 0) { //DevCon.Warning("*PCSX2*: intcInterrupt already cleared"); @@ -97,7 +97,7 @@ __forceinline void intcInterrupt() cpuException(0x400, cpuRegs.branch); } -__forceinline void dmacInterrupt() +__fi void dmacInterrupt() { if( ((psHu16(DMAC_STAT + 2) & psHu16(DMAC_STAT)) == 0 ) && ( psHu16(DMAC_STAT) & 0x8000) == 0 ) @@ -130,7 +130,7 @@ void hwDmacIrq(int n) } // Write 'size' bytes to memory address 'addr' from 'data'. -__releaseinline bool hwMFIFOWrite(u32 addr, const u128* data, uint qwc) +__ri bool hwMFIFOWrite(u32 addr, const u128* data, uint qwc) { // all FIFO addresses should always be QWC-aligned. pxAssume((dmacRegs->rbor.ADDR & 15) == 0); @@ -158,7 +158,7 @@ __releaseinline bool hwMFIFOWrite(u32 addr, const u128* data, uint qwc) return true; } -__releaseinline bool hwDmacSrcChainWithStack(DMACh *dma, int id) { +__ri bool hwDmacSrcChainWithStack(DMACh *dma, int id) { switch (id) { case TAG_REFE: // Refe - Transfer Packet According to ADDR field //End Transfer diff --git a/pcsx2/HwRead.cpp b/pcsx2/HwRead.cpp index 0997bbb413..3563623327 100644 --- a/pcsx2/HwRead.cpp +++ b/pcsx2/HwRead.cpp @@ -21,7 +21,7 @@ using namespace R5900; -static __forceinline void IntCHackCheck() +static __fi void IntCHackCheck() { // Sanity check: To protect from accidentally "rewinding" the cyclecount // on the few times nextBranchCycle can be behind our current cycle. @@ -32,7 +32,7 @@ static __forceinline void IntCHackCheck() ///////////////////////////////////////////////////////////////////////// // Hardware READ 8 bit -__forceinline mem8_t hwRead8(u32 mem) +__fi mem8_t hwRead8(u32 mem) { u8 ret; @@ -150,7 +150,7 @@ __forceinline mem8_t hwRead8(u32 mem) ///////////////////////////////////////////////////////////////////////// // Hardware READ 16 bit -__forceinline mem16_t hwRead16(u32 mem) +__fi mem16_t hwRead16(u32 mem) { u16 ret; const u16 masked_mem = mem & 0xffff; @@ -293,7 +293,7 @@ mem32_t __fastcall hwRead32_page_01(u32 mem) // Reads hardware registers for page 15 (0x0F). // This is used internally to produce two inline versions, one with INTC_HACK, and one without. -static __forceinline mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) +static __fi mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) { // *Performance Warning* This function is called -A-LOT. Be wary when making changes. It // could impact FPS significantly. diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index 71887ae5a4..1b5ae2bcb4 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -58,7 +58,7 @@ static void StartQueuedDMA() if (QueuedDMA.SPR1) { DMA_LOG("Resuming DMA for SPR1"); QueuedDMA.SPR1 = !QuickDmaExec(dmaSPR1, D9_CHCR); } } -static _f void DmaExec( void (*func)(), u32 mem, u32 value ) +static __ri void DmaExec( void (*func)(), u32 mem, u32 value ) { DMACh *reg = &psH_DMACh(mem); tDMA_CHCR chcr(value); @@ -145,7 +145,7 @@ static _f void DmaExec( void (*func)(), u32 mem, u32 value ) // DmaExec8 should only be called for the second byte of CHCR. // Testing Note: dark cloud 2 uses 8 bit DMAs register writes. -static _f void DmaExec8( void (*func)(), u32 mem, u8 value ) +static __fi void DmaExec8( void (*func)(), u32 mem, u8 value ) { pxAssumeMsg( (mem & 0xf) == 1, "DmaExec8 should only be called for the second byte of CHCR" ); @@ -154,7 +154,7 @@ static _f void DmaExec8( void (*func)(), u32 mem, u8 value ) DmaExec( func, mem & ~0xf, (u32)value<<8 ); } -static _f void DmaExec16( void (*func)(), u32 mem, u16 value ) +static __fi void DmaExec16( void (*func)(), u32 mem, u16 value ) { DmaExec( func, mem, (u32)value ); } @@ -418,7 +418,7 @@ void hwWrite8(u32 mem, u8 value) } } -__forceinline void hwWrite16(u32 mem, u16 value) +__ri void hwWrite16(u32 mem, u16 value) { if( mem >= IPU_CMD && mem < D0_CHCR ) Console.Warning( "hwWrite16 to %x", mem ); diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 8bc73dbc66..5798ac7418 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -68,7 +68,7 @@ __aligned16 decoder_t decoder; __aligned16 u8 _readbits[80]; //local buffer (ring buffer) u8* readbits = _readbits; // always can decrement by one 1qw -__forceinline void IPUProcessInterrupt() +__fi void IPUProcessInterrupt() { if (ipuRegs->ctrl.BUSY && g_BP.IFC) IPUWorker(); } @@ -219,7 +219,7 @@ void tIPU_CMD_CSC::log_from_RGB32() const } -__forceinline u32 ipuRead32(u32 mem) +__fi u32 ipuRead32(u32 mem) { // Note: It's assumed that mem's input value is always in the 0x10002000 page // of memory (if not, it's probably bad code). @@ -255,7 +255,7 @@ __forceinline u32 ipuRead32(u32 mem) return *(u32*)(((u8*)ipuRegs) + mem); } -__forceinline u64 ipuRead64(u32 mem) +__fi u64 ipuRead64(u32 mem) { // Note: It's assumed that mem's input value is always in the 0x10002000 page // of memory (if not, it's probably bad code). @@ -307,7 +307,7 @@ void ipuSoftReset() //g_BP.bufferhasnew = 0; } -__forceinline void ipuWrite32(u32 mem, u32 value) +__fi void ipuWrite32(u32 mem, u32 value) { // Note: It's assumed that mem's input value is always in the 0x10002000 page // of memory (if not, it's probably bad code). @@ -346,7 +346,7 @@ __forceinline void ipuWrite32(u32 mem, u32 value) } } -__forceinline void ipuWrite64(u32 mem, u64 value) +__fi void ipuWrite64(u32 mem, u64 value) { // Note: It's assumed that mem's input value is always in the 0x10002000 page // of memory (if not, it's probably bad code). @@ -420,7 +420,7 @@ static BOOL ipuIDEC(u32 val, bool resume) static int s_bdec = 0; -static __forceinline BOOL ipuBDEC(u32 val, bool resume) +static __fi BOOL ipuBDEC(u32 val, bool resume) { tIPU_CMD_BDEC bdec(val); @@ -514,7 +514,7 @@ static BOOL __fastcall ipuVDEC(u32 val) return FALSE; } -static __forceinline BOOL ipuFDEC(u32 val) +static __fi BOOL ipuFDEC(u32 val) { if (!getBits32((u8*)&ipuRegs->cmd.DATA, 0)) return FALSE; @@ -691,7 +691,7 @@ static void ipuSETTH(u32 val) /////////////////////// // IPU Worker Thread // /////////////////////// -__forceinline void IPU_INTERRUPT() //dma +__fi void IPU_INTERRUPT() //dma { hwIntcIrq(INTC_IPU); } @@ -901,7 +901,7 @@ void IPUWorker() // Buffer reader // move the readbits queue -__forceinline void inc_readbits() +__fi void inc_readbits() { readbits += 16; if (readbits >= _readbits + 64) @@ -914,7 +914,7 @@ __forceinline void inc_readbits() } // returns the pointer of readbits moved by 1 qword -__forceinline u8* next_readbits() +__fi u8* next_readbits() { return readbits + 16; } @@ -1070,7 +1070,7 @@ u8 __fastcall getBits32(u8 *address, u32 advance) return 1; } -__forceinline u8 __fastcall getBits16(u8 *address, u32 advance) +__fi u8 __fastcall getBits16(u8 *address, u32 advance) { u32 mask; u8* readpos; @@ -1127,7 +1127,7 @@ u8 __fastcall getBits8(u8 *address, u32 advance) void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V, const int Src_BpS, const int Width, const int Height); -__forceinline void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) +__fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) { int i; u8* p = (u8*)&rgb32; @@ -1161,7 +1161,7 @@ __forceinline void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) } } -__forceinline void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte) +__fi void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte) { int i, j; for (i = 0; i < 16; ++i) @@ -1176,12 +1176,12 @@ __forceinline void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& r } } -__forceinline void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) +__fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) { Console.Error("IPU: VQ not implemented"); } -__forceinline void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16) +__fi void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16) { const u8 *s = (const u8*)&mb8; s16 *d = (s16*)&mb16; @@ -1193,7 +1193,7 @@ __forceinline void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16) -static __forceinline bool ipuDmacPartialChain(tDMA_TAG tag) +static __fi bool ipuDmacPartialChain(tDMA_TAG tag) { switch (tag.ID) { @@ -1211,7 +1211,7 @@ static __forceinline bool ipuDmacPartialChain(tDMA_TAG tag) extern void gsInterrupt(); extern void vif1Interrupt(); -static __forceinline void ipuDmacSrcChain() +static __fi void ipuDmacSrcChain() { switch (IPU1Status.ChainMode) @@ -1243,7 +1243,7 @@ static __forceinline void ipuDmacSrcChain() } } -static __forceinline bool WaitGSPaths() +static __fi bool WaitGSPaths() { if(CHECK_IPUWAITHACK) { @@ -1268,7 +1268,7 @@ static __forceinline bool WaitGSPaths() return true; } -static __forceinline int IPU1chain() { +static __fi int IPU1chain() { int totalqwc = 0; @@ -1304,7 +1304,7 @@ static __forceinline int IPU1chain() { return totalqwc; } -//static __forceinline bool WaitGSPaths() +//static __fi bool WaitGSPaths() //{ // //Wait for all GS paths to be clear // if (GSTransferStatus._u32 != 0x2a) @@ -1524,7 +1524,7 @@ int IPU0dma() return readsize; } -__forceinline void dmaIPU0() // fromIPU +__fi void dmaIPU0() // fromIPU { if (ipu0dma->pad != 0) { @@ -1539,7 +1539,7 @@ __forceinline void dmaIPU0() // fromIPU if (ipuRegs->ctrl.BUSY) IPUWorker(); } -__forceinline void dmaIPU1() // toIPU +__fi void dmaIPU1() // toIPU { IPU_LOG("IPU1DMAStart QWC %x, MADR %x, CHCR %x, TADR %x", ipu1dma->qwc, ipu1dma->madr, ipu1dma->chcr._u32, ipu1dma->tadr); diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index 6b403f1320..ad9b14e20b 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -27,7 +27,7 @@ #define IPU_INT_TO( cycles ) if(!(cpuRegs.interrupt & (1<<4))) CPU_INT( DMAC_TO_IPU, cycles ) #define IPU_INT_FROM( cycles ) CPU_INT( DMAC_FROM_IPU, cycles ) -#define IPU_FORCEINLINE __forceinline +#define IPU_FORCEINLINE __fi struct IPUStatus { bool InProgress; diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp index e1467724e2..91d29ff1f9 100644 --- a/pcsx2/IPU/IPU_Fifo.cpp +++ b/pcsx2/IPU/IPU_Fifo.cpp @@ -168,7 +168,7 @@ void IPU_Fifo_Output::readsingle(void *value) } } -__forceinline bool decoder_t::ReadIpuData(u128* out) +__fi bool decoder_t::ReadIpuData(u128* out) { if(decoder.ipu0_data == 0) return false; _mm_store_ps((float*)out, _mm_load_ps((float*)GetIpuDataPtr())); diff --git a/pcsx2/IPU/mpeg2lib/Idct.cpp b/pcsx2/IPU/mpeg2lib/Idct.cpp index 69daaa89b6..82492b2545 100644 --- a/pcsx2/IPU/mpeg2lib/Idct.cpp +++ b/pcsx2/IPU/mpeg2lib/Idct.cpp @@ -66,7 +66,7 @@ do { \ } while (0) #endif -static __forceinline void idct_row (s16 * const block) +static __fi void idct_row (s16 * const block) { int d0, d1, d2, d3; int a0, a1, a2, a3, b0, b1, b2, b3; @@ -119,7 +119,7 @@ static __forceinline void idct_row (s16 * const block) block[7] = (a0 - b0) >> 8; } -static __forceinline void idct_col (s16 * const block) +static __fi void idct_col (s16 * const block) { int d0, d1, d2, d3; int a0, a1, a2, a3, b0, b1, b2, b3; @@ -160,7 +160,7 @@ static __forceinline void idct_col (s16 * const block) block[8*7] = (a0 - b0) >> 17; } -__releaseinline void mpeg2_idct_copy(s16 * block, u8 * dest, const int stride) +__ri void mpeg2_idct_copy(s16 * block, u8 * dest, const int stride) { int i; @@ -189,7 +189,7 @@ __releaseinline void mpeg2_idct_copy(s16 * block, u8 * dest, const int stride) // stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]). -__releaseinline void mpeg2_idct_add (const int last, s16 * block, s16 * dest, const int stride) +__ri void mpeg2_idct_add (const int last, s16 * block, s16 * dest, const int stride) { // on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0). diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp index 9fbc48c155..dddb64cfad 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp +++ b/pcsx2/IPU/mpeg2lib/Mpeg.cpp @@ -164,7 +164,7 @@ intra: } } -static __forceinline int get_quantizer_scale() +static __fi int get_quantizer_scale() { int quantizer_scale_code; @@ -176,7 +176,7 @@ static __forceinline int get_quantizer_scale() return quantizer_scale_code << 1; } -static __forceinline int get_coded_block_pattern() +static __fi int get_coded_block_pattern() { const CBPtab * tab; u16 code = UBITS(16); @@ -190,7 +190,7 @@ static __forceinline int get_coded_block_pattern() return tab->cbp; } -int __forceinline get_motion_delta(const int f_code) +int __fi get_motion_delta(const int f_code) { int delta; int sign; @@ -219,7 +219,7 @@ int __forceinline get_motion_delta(const int f_code) return (delta ^ sign) - sign; } -int __forceinline get_dmv() +int __fi get_dmv() { const DMVtab * tab; @@ -261,7 +261,7 @@ int get_macroblock_address_increment() return mba->mba + 1; } -static __forceinline int get_luma_dc_dct_diff() +static __fi int get_luma_dc_dct_diff() { int size; int dc_diff; @@ -297,7 +297,7 @@ static __forceinline int get_luma_dc_dct_diff() return dc_diff; } -static __forceinline int get_chroma_dc_dct_diff() +static __fi int get_chroma_dc_dct_diff() { int size; int dc_diff; @@ -336,7 +336,7 @@ do { \ val = (((s32)val) >> 31) ^ 2047; \ } while (0) -static __forceinline bool get_intra_block() +static __fi bool get_intra_block() { int i; int j; @@ -488,7 +488,7 @@ static __forceinline bool get_intra_block() return true; } -static __forceinline bool get_non_intra_block(int * last) +static __fi bool get_non_intra_block(int * last) { int i; int j; @@ -629,7 +629,7 @@ static __forceinline bool get_non_intra_block(int * last) return true; } -static __forceinline bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) +static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) { if (!skip || ipu_cmd.pos[3]) { @@ -659,7 +659,7 @@ static __forceinline bool slice_intra_DCT(const int cc, u8 * const dest, const i return true; } -static __forceinline bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) +static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) { int last; @@ -678,7 +678,7 @@ static __forceinline bool slice_non_intra_DCT(s16 * const dest, const int stride return true; } -void __forceinline finishmpeg2sliceIDEC() +void __fi finishmpeg2sliceIDEC() { ipuRegs->ctrl.SCD = 0; coded_block_pattern = decoder.coded_block_pattern; diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h index a25c7f00b7..0b30d1b8bb 100644 --- a/pcsx2/IPU/mpeg2lib/Vlc.h +++ b/pcsx2/IPU/mpeg2lib/Vlc.h @@ -34,7 +34,7 @@ //static u8 dword[8]; //static u8 qword[16]; -static __forceinline int GETWORD() +static __fi int GETWORD() { static u8 data[2]; @@ -56,7 +56,7 @@ static __forceinline int GETWORD() return 1; } -static __forceinline int bitstream_init () +static __fi int bitstream_init () { if (!getBits32((u8*)&decoder.bitstream_buf, 1)) { @@ -72,7 +72,7 @@ static __forceinline int bitstream_init () } /* remove num valid bits from bit_buf */ -static __forceinline void DUMPBITS(int num) +static __fi void DUMPBITS(int num) { decoder.bitstream_buf <<= num; decoder.bitstream_bits += num; @@ -85,7 +85,7 @@ static __forceinline void DUMPBITS(int num) #define SBITS(num) (((s32)decoder.bitstream_buf) >> (32 - (num))) /* Get bits from bitstream */ -static __forceinline u32 GETBITS(int num) +static __fi u32 GETBITS(int num) { u16 retVal = UBITS(num); DUMPBITS(num); diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 8ccd15a197..ea6d14b7ef 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -112,7 +112,7 @@ static const __aligned16 SSE2_Tables sse2_tables = static __aligned16 u16 yuv2rgb_temp[3][8]; // This could potentially be improved for SSE4 -__releaseinline void yuv2rgb_sse2(void) +__ri void yuv2rgb_sse2(void) { #if defined(_MSC_VER) || defined(__INTEL_COMPILER) __asm { diff --git a/pcsx2/Interpreter.cpp b/pcsx2/Interpreter.cpp index 68f115007b..cb7e46c283 100644 --- a/pcsx2/Interpreter.cpp +++ b/pcsx2/Interpreter.cpp @@ -77,7 +77,7 @@ static void execI() opcode.interpret(); } -static __forceinline void _doBranch_shared(u32 tar) +static __fi void _doBranch_shared(u32 tar) { branch2 = cpuRegs.branch = 1; execI(); diff --git a/pcsx2/IopBios.cpp b/pcsx2/IopBios.cpp index 4a86176278..1912696ee1 100644 --- a/pcsx2/IopBios.cpp +++ b/pcsx2/IopBios.cpp @@ -107,7 +107,7 @@ public: fd = hostfd; } - static __forceinline int translate_error(int err) + static __fi int translate_error(int err) { if (err >= 0) return err; diff --git a/pcsx2/IopCounters.cpp b/pcsx2/IopCounters.cpp index b1696f9e98..ebd49f8d29 100644 --- a/pcsx2/IopCounters.cpp +++ b/pcsx2/IopCounters.cpp @@ -194,7 +194,7 @@ static void __fastcall _rcntTestTarget( int i ) } -static __forceinline void _rcntTestOverflow( int i ) +static __fi void _rcntTestOverflow( int i ) { u64 maxTarget = ( i < 3 ) ? 0xffff : 0xfffffffful; if( psxCounters[i].count <= maxTarget ) return; @@ -538,7 +538,7 @@ void psxRcntWcount32(int index, u32 value) ////////////////////////////////////////////////////////////////////////////////////////// // -__forceinline void psxRcntWmode16( int index, u32 value ) +__fi void psxRcntWmode16( int index, u32 value ) { PSXCNT_LOG( "IOP Counter[%d] writeMode = 0x%04X", index, value ); @@ -599,7 +599,7 @@ __forceinline void psxRcntWmode16( int index, u32 value ) ////////////////////////////////////////////////////////////////////////////////////////// // -__forceinline void psxRcntWmode32( int index, u32 value ) +__fi void psxRcntWmode32( int index, u32 value ) { PSXCNT_LOG( "IOP Counter[%d] writeMode = 0x%04x", index, value ); diff --git a/pcsx2/IopDma.cpp b/pcsx2/IopDma.cpp index c8525e86a1..238300924f 100644 --- a/pcsx2/IopDma.cpp +++ b/pcsx2/IopDma.cpp @@ -261,10 +261,10 @@ struct DmaHandlerInfo DmaIHandler Interrupt; DmaSHandler Start; - __forceinline u32& REG_MADR(void) const { return psxHu32(DmacRegisterBase + 0x0); } - __forceinline u32& REG_BCR(void) const { return psxHu32(DmacRegisterBase + 0x4); } - __forceinline u32& REG_CHCR(void) const { return psxHu32(DmacRegisterBase + 0x8); } - __forceinline u32& REG_TADR(void) const { return psxHu32(DmacRegisterBase + 0xC); } + __fi u32& REG_MADR(void) const { return psxHu32(DmacRegisterBase + 0x0); } + __fi u32& REG_BCR(void) const { return psxHu32(DmacRegisterBase + 0x4); } + __fi u32& REG_CHCR(void) const { return psxHu32(DmacRegisterBase + 0x8); } + __fi u32& REG_TADR(void) const { return psxHu32(DmacRegisterBase + 0xC); } }; #define MEM_BASE1 0x1f801080 @@ -452,7 +452,7 @@ void IopDmaStart(int channel) // IopDmaProcessChannel: Called from IopDmaUpdate (below) to process a dma channel template -static void __releaseinline IopDmaProcessChannel(int elapsed, int& MinDelay) +static void __ri IopDmaProcessChannel(int elapsed, int& MinDelay) { // Hopefully the compiler would be able to optimize the whole function away if this doesn't pass. if(!(IopDmaHandlers[channel].DirectionFlags&_E__)) diff --git a/pcsx2/IopHw.cpp b/pcsx2/IopHw.cpp index 69d8a9bcab..37dcbc7520 100644 --- a/pcsx2/IopHw.cpp +++ b/pcsx2/IopHw.cpp @@ -37,7 +37,7 @@ void psxHwReset() { //sio2Reset(); } -__forceinline u8 psxHw4Read8(u32 add) +__fi u8 psxHw4Read8(u32 add) { u16 mem = add & 0xFF; u8 ret = cdvdRead(mem); @@ -45,7 +45,7 @@ __forceinline u8 psxHw4Read8(u32 add) return ret; } -__forceinline void psxHw4Write8(u32 add, u8 value) +__fi void psxHw4Write8(u32 add, u8 value) { u8 mem = (u8)add; // only lower 8 bits are relevant (cdvd regs mirror across the page) cdvdWrite(mem, value); diff --git a/pcsx2/IopMem.h b/pcsx2/IopMem.h index 5047015205..6e1f6c77cf 100644 --- a/pcsx2/IopMem.h +++ b/pcsx2/IopMem.h @@ -30,7 +30,7 @@ extern const uptr *psxMemRLUT; // Hacky! This should really never be used, ever, since it bypasses the iop's Hardware // Register handler and SPU/DEV/USB maps. template -static __forceinline T* iopVirtMemW( u32 mem ) +static __fi T* iopVirtMemW( u32 mem ) { return (psxMemWLUT[(mem) >> 16] == 0) ? NULL : (T*)(psxMemWLUT[(mem) >> 16] + ((mem) & 0xffff)); } @@ -42,14 +42,14 @@ static __forceinline T* iopVirtMemW( u32 mem ) // TLB should be using iopMemRead/Write instead for each individual access. That ensures // correct handling of page boundary crossings. template -static __forceinline const T* iopVirtMemR( u32 mem ) +static __fi const T* iopVirtMemR( u32 mem ) { mem &= 0x1fffffff; return (psxMemRLUT[mem >> 16] == 0) ? NULL : (const T*)(psxMemRLUT[mem >> 16] + (mem & 0xffff)); } // Obtains a pointer to the IOP's physical mapping (bypasses the TLB) -static __forceinline u8* iopPhysMem( u32 addr ) +static __fi u8* iopPhysMem( u32 addr ) { return &psxM[addr & 0x1fffff]; } diff --git a/pcsx2/MMI.cpp b/pcsx2/MMI.cpp index f9e9ab2e10..53b9f3d6ba 100644 --- a/pcsx2/MMI.cpp +++ b/pcsx2/MMI.cpp @@ -145,7 +145,7 @@ namespace MMI { //*****************MMI OPCODES********************************* -__forceinline void _PLZCW(int n) +static __fi void _PLZCW(int n) { // This function counts the number of "like" bits in the source register, starting // with the MSB and working its way down, and returns the result MINUS ONE. @@ -171,7 +171,7 @@ void PLZCW() { _PLZCW (1); } -__forceinline void PMFHL_CLAMP(u16 dst, u16 src) +__fi void PMFHL_CLAMP(u16 dst, u16 src) { if ((int)src > (int)0x00007fff) dst = 0x7fff; @@ -254,7 +254,7 @@ void PMTHL() { cpuRegs.HI.UL[2] = cpuRegs.GPR.r[_Rs_].UL[3]; } -__forceinline void _PSLLH(int n) +static __fi void _PSLLH(int n) { cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rt_].US[n] << ( _Sa_ & 0xf ); } @@ -266,7 +266,7 @@ void PSLLH() { _PSLLH(4); _PSLLH(5); _PSLLH(6); _PSLLH(7); } -__forceinline void _PSRLH(int n) +static __fi void _PSRLH(int n) { cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rt_].US[n] >> ( _Sa_ & 0xf ); } @@ -278,7 +278,7 @@ void PSRLH () { _PSRLH(4); _PSRLH(5); _PSRLH(6); _PSRLH(7); } -__forceinline void _PSRAH(int n) +static __fi void _PSRAH(int n) { cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rt_].SS[n] >> ( _Sa_ & 0xf ); } @@ -290,7 +290,7 @@ void PSRAH() { _PSRAH(4); _PSRAH(5); _PSRAH(6); _PSRAH(7); } -__forceinline void _PSLLW(int n) +static __fi void _PSLLW(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = cpuRegs.GPR.r[_Rt_].UL[n] << _Sa_; } @@ -301,7 +301,7 @@ void PSLLW() { _PSLLW(0); _PSLLW(1); _PSLLW(2); _PSLLW(3); } -__forceinline void _PSRLW(int n) +static __fi void _PSRLW(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = cpuRegs.GPR.r[_Rt_].UL[n] >> _Sa_; } @@ -312,7 +312,7 @@ void PSRLW() { _PSRLW(0); _PSRLW(1); _PSRLW(2); _PSRLW(3); } -__forceinline void _PSRAW(int n) +static __fi void _PSRAW(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = cpuRegs.GPR.r[_Rt_].SL[n] >> _Sa_; } @@ -326,7 +326,7 @@ void PSRAW() { //*****************END OF MMI OPCODES************************** //*************************MMI0 OPCODES************************ -__forceinline void _PADDW(int n) +static __fi void _PADDW(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = cpuRegs.GPR.r[_Rs_].UL[n] + cpuRegs.GPR.r[_Rt_].UL[n]; } @@ -337,7 +337,7 @@ void PADDW() { _PADDW(0); _PADDW(1); _PADDW(2); _PADDW(3); } -__forceinline void _PSUBW(int n) +static __fi void _PSUBW(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = cpuRegs.GPR.r[_Rs_].UL[n] - cpuRegs.GPR.r[_Rt_].UL[n]; } @@ -348,7 +348,7 @@ void PSUBW() { _PSUBW(0); _PSUBW(1); _PSUBW(2); _PSUBW(3); } -__forceinline void _PCGTW(int n) +static __fi void _PCGTW(int n) { if (cpuRegs.GPR.r[_Rs_].SL[n] > cpuRegs.GPR.r[_Rt_].SL[n]) cpuRegs.GPR.r[_Rd_].UL[n] = 0xFFFFFFFF; @@ -362,7 +362,7 @@ void PCGTW() { _PCGTW(0); _PCGTW(1); _PCGTW(2); _PCGTW(3); } -__forceinline void _PMAXW(int n) +static __fi void _PMAXW(int n) { if (cpuRegs.GPR.r[_Rs_].SL[n] > cpuRegs.GPR.r[_Rt_].SL[n]) cpuRegs.GPR.r[_Rd_].UL[n] = cpuRegs.GPR.r[_Rs_].UL[n]; @@ -376,7 +376,7 @@ void PMAXW() { _PMAXW(0); _PMAXW(1); _PMAXW(2); _PMAXW(3); } -__forceinline void _PADDH(int n) +static __fi void _PADDH(int n) { cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rs_].US[n] + cpuRegs.GPR.r[_Rt_].US[n]; } @@ -388,7 +388,7 @@ void PADDH() { _PADDH(4); _PADDH(5); _PADDH(6); _PADDH(7); } -__forceinline void _PSUBH(int n) +static __fi void _PSUBH(int n) { cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rs_].US[n] - cpuRegs.GPR.r[_Rt_].US[n]; } @@ -400,7 +400,7 @@ void PSUBH() { _PSUBH(4); _PSUBH(5); _PSUBH(6); _PSUBH(7); } -__forceinline void _PCGTH(int n) +static __fi void _PCGTH(int n) { if (cpuRegs.GPR.r[_Rs_].SS[n] > cpuRegs.GPR.r[_Rt_].SS[n]) cpuRegs.GPR.r[_Rd_].US[n] = 0xFFFF; @@ -415,7 +415,7 @@ void PCGTH() { _PCGTH(4); _PCGTH(5); _PCGTH(6); _PCGTH(7); } -__forceinline void _PMAXH(int n) +static __fi void _PMAXH(int n) { if (cpuRegs.GPR.r[_Rs_].SS[n] > cpuRegs.GPR.r[_Rt_].SS[n]) cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rs_].US[n]; @@ -430,7 +430,7 @@ void PMAXH() { _PMAXH(4); _PMAXH(5); _PMAXH(6); _PMAXH(7); } -__forceinline void _PADDB(int n) +static __fi void _PADDB(int n) { cpuRegs.GPR.r[_Rd_].SC[n] = cpuRegs.GPR.r[_Rs_].SC[n] + cpuRegs.GPR.r[_Rt_].SC[n]; } @@ -443,7 +443,7 @@ void PADDB() { _PADDB( i ); } -__forceinline void _PSUBB(int n) +static __fi void _PSUBB(int n) { cpuRegs.GPR.r[_Rd_].SC[n] = cpuRegs.GPR.r[_Rs_].SC[n] - cpuRegs.GPR.r[_Rt_].SC[n]; } @@ -456,7 +456,7 @@ void PSUBB() { _PSUBB( i ); } -__forceinline void _PCGTB(int n) +static __fi void _PCGTB(int n) { if (cpuRegs.GPR.r[_Rs_].SC[n] > cpuRegs.GPR.r[_Rt_].SC[n]) cpuRegs.GPR.r[_Rd_].UC[n] = 0xFF; @@ -472,7 +472,7 @@ void PCGTB() { _PCGTB( i ); } -__forceinline void _PADDSW(int n) +static __fi void _PADDSW(int n) { s64 sTemp64; @@ -491,7 +491,7 @@ void PADDSW() { _PADDSW(0); _PADDSW(1); _PADDSW(2); _PADDSW(3); } -__forceinline void _PSUBSW(int n) +static __fi void _PSUBSW(int n) { s64 sTemp64; @@ -538,7 +538,7 @@ void PPACW() { cpuRegs.GPR.r[_Rd_].UL[3] = Rs.UL[2]; } -__forceinline void _PADDSH(int n) +__fi void _PADDSH(int n) { s32 sTemp32; sTemp32 = (s32)cpuRegs.GPR.r[_Rs_].SS[n] + (s32)cpuRegs.GPR.r[_Rt_].SS[n]; @@ -558,7 +558,7 @@ void PADDSH() { _PADDSH(4); _PADDSH(5); _PADDSH(6); _PADDSH(7); } -__forceinline void _PSUBSH(int n) +__fi void _PSUBSH(int n) { s32 sTemp32; sTemp32 = (s32)cpuRegs.GPR.r[_Rs_].SS[n] - (s32)cpuRegs.GPR.r[_Rt_].SS[n]; @@ -610,7 +610,7 @@ void PPACH() { cpuRegs.GPR.r[_Rd_].US[7] = Rs.US[6]; } -__forceinline void _PADDSB(int n) +__fi void _PADDSB(int n) { s16 sTemp16; sTemp16 = (s16)cpuRegs.GPR.r[_Rs_].SC[n] + (s16)cpuRegs.GPR.r[_Rt_].SC[n]; @@ -631,7 +631,7 @@ void PADDSB() { _PADDSB(i); } -static __forceinline void _PSUBSB( u8 n ) +static __fi void _PSUBSB( u8 n ) { s16 sTemp16; sTemp16 = (s16)cpuRegs.GPR.r[_Rs_].SC[n] - (s16)cpuRegs.GPR.r[_Rt_].SC[n]; @@ -706,7 +706,7 @@ void PPACB() { cpuRegs.GPR.r[_Rd_].UC[15] = Rs.UC[14]; } -__forceinline void _PEXT5(int n) +__fi void _PEXT5(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = ((cpuRegs.GPR.r[_Rt_].UL[n] & 0x0000001F) << 3) | @@ -721,7 +721,7 @@ void PEXT5() { _PEXT5(0); _PEXT5(1); _PEXT5(2); _PEXT5(3); } -__forceinline void _PPAC5(int n) +__fi void _PPAC5(int n) { cpuRegs.GPR.r[_Rd_].UL[n] = ((cpuRegs.GPR.r[_Rt_].UL[n] >> 3) & 0x0000001F) | @@ -739,7 +739,7 @@ void PPAC5() { //***END OF MMI0 OPCODES****************************************** //**********MMI1 OPCODES************************************** -__forceinline void _PABSW(int n) +static __fi void _PABSW(int n) { if (cpuRegs.GPR.r[_Rt_].UL[n] == 0x80000000) cpuRegs.GPR.r[_Rd_].UL[n] = 0x7fffffff; //clamp @@ -755,7 +755,7 @@ void PABSW() { _PABSW(0); _PABSW(1); _PABSW(2); _PABSW(3); } -__forceinline void _PCEQW(int n) +static __fi void _PCEQW(int n) { if (cpuRegs.GPR.r[_Rs_].UL[n] == cpuRegs.GPR.r[_Rt_].UL[n]) cpuRegs.GPR.r[_Rd_].UL[n] = 0xFFFFFFFF; @@ -769,7 +769,7 @@ void PCEQW() { _PCEQW(0); _PCEQW(1); _PCEQW(2); _PCEQW(3); } -static __forceinline void _PMINW( u8 n ) +static __fi void _PMINW( u8 n ) { if (cpuRegs.GPR.r[_Rs_].SL[n] < cpuRegs.GPR.r[_Rt_].SL[n]) cpuRegs.GPR.r[_Rd_].SL[n] = cpuRegs.GPR.r[_Rs_].SL[n]; @@ -790,7 +790,7 @@ void PADSBH() { _PADDH(4); _PADDH(5); _PADDH(6); _PADDH(7); } -__forceinline void _PABSH(int n) +static __fi void _PABSH(int n) { if (cpuRegs.GPR.r[_Rt_].US[n] == 0x8000) cpuRegs.GPR.r[_Rd_].US[n] = 0x7fff; //clamp @@ -807,7 +807,7 @@ void PABSH() { _PABSH(4); _PABSH(5); _PABSH(6); _PABSH(7); } -static __forceinline void _PCEQH( u8 n ) +static __fi void _PCEQH( u8 n ) { if (cpuRegs.GPR.r[_Rs_].US[n] == cpuRegs.GPR.r[_Rt_].US[n]) cpuRegs.GPR.r[_Rd_].US[n] = 0xFFFF; @@ -822,7 +822,7 @@ void PCEQH() { _PCEQH(4); _PCEQH(5); _PCEQH(6); _PCEQH(7); } -static __forceinline void _PMINH( u8 n ) +static __fi void _PMINH( u8 n ) { if (cpuRegs.GPR.r[_Rs_].SS[n] < cpuRegs.GPR.r[_Rt_].SS[n]) cpuRegs.GPR.r[_Rd_].US[n] = cpuRegs.GPR.r[_Rs_].US[n]; @@ -837,7 +837,7 @@ void PMINH() { _PMINH(4); _PMINH(5); _PMINH(6); _PMINH(7); } -__forceinline void _PCEQB(int n) +__fi void _PCEQB(int n) { if (cpuRegs.GPR.r[_Rs_].UC[n] == cpuRegs.GPR.r[_Rt_].UC[n]) cpuRegs.GPR.r[_Rd_].UC[n] = 0xFF; @@ -853,7 +853,7 @@ void PCEQB() { _PCEQB(i); } -__forceinline void _PADDUW(int n) +__fi void _PADDUW(int n) { s64 tmp; tmp = (s64)cpuRegs.GPR.r[_Rs_].UL[n] + (s64)cpuRegs.GPR.r[_Rt_].UL[n]; @@ -870,7 +870,7 @@ void PADDUW () { _PADDUW(0); _PADDUW(1); _PADDUW(2); _PADDUW(3); } -__forceinline void _PSUBUW(int n) +__fi void _PSUBUW(int n) { s64 sTemp64; sTemp64 = (s64)cpuRegs.GPR.r[_Rs_].UL[n] - (s64)cpuRegs.GPR.r[_Rt_].UL[n]; @@ -899,7 +899,7 @@ void PEXTUW() { cpuRegs.GPR.r[_Rd_].UL[3] = Rs.UL[3]; } -__forceinline void _PADDUH(int n) +__fi void _PADDUH(int n) { s32 sTemp32; sTemp32 = (s32)cpuRegs.GPR.r[_Rs_].US[n] + (s32)cpuRegs.GPR.r[_Rt_].US[n]; @@ -917,7 +917,7 @@ void PADDUH() { _PADDUH(4); _PADDUH(5); _PADDUH(6); _PADDUH(7); } -__forceinline void _PSUBUH(int n) +__fi void _PSUBUH(int n) { s32 sTemp32; sTemp32 = (s32)cpuRegs.GPR.r[_Rs_].US[n] - (s32)cpuRegs.GPR.r[_Rt_].US[n]; @@ -952,7 +952,7 @@ void PEXTUH() { cpuRegs.GPR.r[_Rd_].US[7] = Rs.US[7]; } -__forceinline void _PADDUB(int n) +__fi void _PADDUB(int n) { u16 Temp16; Temp16 = (u16)cpuRegs.GPR.r[_Rs_].UC[n] + (u16)cpuRegs.GPR.r[_Rt_].UC[n]; @@ -971,7 +971,7 @@ void PADDUB() { _PADDUB(i); } -__forceinline void _PSUBUB(int n) { +__fi void _PSUBUB(int n) { s16 sTemp16; sTemp16 = (s16)cpuRegs.GPR.r[_Rs_].UC[n] - (s16)cpuRegs.GPR.r[_Rt_].UC[n]; @@ -1060,7 +1060,7 @@ void QFSRV() { // JayteeMaster: changed a bit to avoid screw up //*********MMI2 OPCODES*************************************** -__forceinline void _PMADDW(int dd, int ss) +static __fi void _PMADDW(int dd, int ss) { s64 temp = (s64)((s64)cpuRegs.LO.SL[ss] | ((s64)cpuRegs.HI.SL[ss] << 32)) + ((s64)cpuRegs.GPR.r[_Rs_].SL[ss] * (s64)cpuRegs.GPR.r[_Rt_].SL[ss]); @@ -1094,7 +1094,7 @@ void PSRLVW() { (cpuRegs.GPR.r[_Rs_].UL[2] & 0x1F)); } -__forceinline void _PMSUBW(int dd, int ss) +__fi void _PMSUBW(int dd, int ss) { s64 temp = (s64)((s64)cpuRegs.LO.SL[ss] | ((s64)cpuRegs.HI.SL[ss] << 32)) - ((s64)cpuRegs.GPR.r[_Rs_].SL[ss] * (s64)cpuRegs.GPR.r[_Rt_].SL[ss]); @@ -1140,7 +1140,7 @@ void PINTH() { cpuRegs.GPR.r[_Rd_].US[7] = Rs.US[7]; } -__forceinline void _PMULTW(int dd, int ss) +__fi void _PMULTW(int dd, int ss) { s64 temp = (s64)cpuRegs.GPR.r[_Rs_].SL[ss] * (s64)cpuRegs.GPR.r[_Rt_].SL[ss]; @@ -1155,7 +1155,7 @@ void PMULTW() { _PMULTW(1, 2); } -__forceinline void _PDIVW(int dd, int ss) +__fi void _PDIVW(int dd, int ss) { if (cpuRegs.GPR.r[_Rs_].UL[ss] == 0x80000000 && cpuRegs.GPR.r[_Rt_].UL[ss] == 0xffffffff) { @@ -1229,7 +1229,7 @@ void PMADDH() { // JayteeMaster: changed a bit to avoid screw up } // JayteeMaster: changed a bit to avoid screw up -__forceinline void _PHMADH_LO(int dd, int n) +__fi void _PHMADH_LO(int dd, int n) { s32 firsttemp = (s32)cpuRegs.GPR.r[_Rs_].SS[n+1] * (s32)cpuRegs.GPR.r[_Rt_].SS[n+1]; s32 temp = firsttemp + (s32)cpuRegs.GPR.r[_Rs_].SS[n] * (s32)cpuRegs.GPR.r[_Rt_].SS[n]; @@ -1238,7 +1238,7 @@ __forceinline void _PHMADH_LO(int dd, int n) cpuRegs.LO.UL[dd+1] = firsttemp; } -__forceinline void _PHMADH_HI(int dd, int n) +__fi void _PHMADH_HI(int dd, int n) { s32 firsttemp = (s32)cpuRegs.GPR.r[_Rs_].SS[n+1] * (s32)cpuRegs.GPR.r[_Rt_].SS[n+1]; s32 temp = firsttemp + (s32)cpuRegs.GPR.r[_Rs_].SS[n] * (s32)cpuRegs.GPR.r[_Rt_].SS[n]; @@ -1314,7 +1314,7 @@ void PMSUBH() { // JayteeMaster: changed a bit to avoid screw up } // JayteeMaster: changed a bit to avoid screw up -__forceinline void _PHMSBH_LO(int dd, int n, int rdd) +static __fi void _PHMSBH_LO(int dd, int n, int rdd) { s32 firsttemp = (s32)cpuRegs.GPR.r[_Rs_].SS[n+1] * (s32)cpuRegs.GPR.r[_Rt_].SS[n+1]; s32 temp = firsttemp - (s32)cpuRegs.GPR.r[_Rs_].SS[n] * (s32)cpuRegs.GPR.r[_Rt_].SS[n]; @@ -1322,7 +1322,7 @@ __forceinline void _PHMSBH_LO(int dd, int n, int rdd) cpuRegs.LO.UL[dd] = temp; cpuRegs.LO.UL[dd+1] = ~firsttemp; } -__forceinline void _PHMSBH_HI(int dd, int n, int rdd) +static __fi void _PHMSBH_HI(int dd, int n, int rdd) { s32 firsttemp = (s32)cpuRegs.GPR.r[_Rs_].SS[n+1] * (s32)cpuRegs.GPR.r[_Rt_].SS[n+1]; s32 temp = firsttemp - (s32)cpuRegs.GPR.r[_Rs_].SS[n] * (s32)cpuRegs.GPR.r[_Rt_].SS[n]; @@ -1415,7 +1415,7 @@ void PMULTH() { // JayteeMaster: changed a bit to avoid screw up } } -__forceinline void _PDIVBW(int n) +__fi void _PDIVBW(int n) { if (cpuRegs.GPR.r[_Rs_].UL[n] == 0x80000000 && cpuRegs.GPR.r[_Rt_].US[0] == 0xffff) { @@ -1466,7 +1466,7 @@ void PROT3W() { //*************************MMI3 OPCODES************************ -__forceinline void _PMADDUW(int dd, int ss) +static __fi void _PMADDUW(int dd, int ss) { u64 tempu = (u64)((u64)cpuRegs.LO.UL[ss] | ((u64)cpuRegs.HI.UL[ss] << 32)) + \ ((u64)cpuRegs.GPR.r[_Rs_].UL[ss] * (u64)cpuRegs.GPR.r[_Rt_].UL[ss]); @@ -1517,7 +1517,7 @@ void PINTEH() { cpuRegs.GPR.r[_Rd_].US[7] = Rs.US[6]; } -__forceinline void _PMULTUW(int dd, int ss) +__fi void _PMULTUW(int dd, int ss) { u64 tempu = (u64)cpuRegs.GPR.r[_Rs_].UL[ss] * (u64)cpuRegs.GPR.r[_Rt_].UL[ss]; @@ -1533,7 +1533,7 @@ void PMULTUW() { _PMULTUW(1, 2); } -__forceinline void _PDIVUW(int dd, int ss) +__fi void _PDIVUW(int dd, int ss) { if (cpuRegs.GPR.r[_Rt_].UL[ss] != 0) { cpuRegs.LO.SD[dd] = (s32)(cpuRegs.GPR.r[_Rs_].UL[ss] / cpuRegs.GPR.r[_Rt_].UL[ss]); diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index 115e08c3c5..8a75defc72 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -737,7 +737,7 @@ void SysMtgsThread::PrepDataPacket( GIF_PATH pathidx, u32 size ) PrepDataPacket( (MTGS_RingCommand)pathidx, size ); } -__forceinline void SysMtgsThread::_FinishSimplePacket() +__fi void SysMtgsThread::_FinishSimplePacket() { uint future_writepos = (m_WritePos+1) & RingBufferMask; pxAssert( future_writepos != volatize(m_ReadPos) ); diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index a5bb2e097d..44a7c3fd83 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -469,7 +469,7 @@ static void __fastcall _ext_memWrite128(u32 mem, const mem128_t *value) typedef void __fastcall ClearFunc_t( u32 addr, u32 qwc ); template -static __forceinline void ClearVuFunc( u32 addr, u32 size ) +static __fi void ClearVuFunc( u32 addr, u32 size ) { if( vunum==0 ) CpuVU0->Clear(addr,size); @@ -928,7 +928,7 @@ void mmap_MarkCountedRamPage( u32 paddr ) // offset - offset of address relative to psM. // All recompiled blocks belonging to the page are cleared, and any new blocks recompiled // from code residing in this page will use manual protection. -static __forceinline void mmap_ClearCpuBlock( uint offset ) +static __fi void mmap_ClearCpuBlock( uint offset ) { int rampage = offset >> 12; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 7eaf2ec63f..dc21702716 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -246,7 +246,7 @@ const wxChar *const tbl_GamefixNames[] = L"OPHFlag" }; -const __forceinline wxChar* EnumToString( GamefixId id ) +const __fi wxChar* EnumToString( GamefixId id ) { return tbl_GamefixNames[id]; } diff --git a/pcsx2/R3000A.cpp b/pcsx2/R3000A.cpp index 6c25e29087..e492bf169e 100644 --- a/pcsx2/R3000A.cpp +++ b/pcsx2/R3000A.cpp @@ -108,7 +108,7 @@ void __fastcall psxException(u32 code, u32 bd) }*/ } -__forceinline void psxSetNextBranch( u32 startCycle, s32 delta ) +__fi void psxSetNextBranch( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. @@ -117,12 +117,12 @@ __forceinline void psxSetNextBranch( u32 startCycle, s32 delta ) g_psxNextBranchCycle = startCycle + delta; } -__forceinline void psxSetNextBranchDelta( s32 delta ) +__fi void psxSetNextBranchDelta( s32 delta ) { psxSetNextBranch( psxRegs.cycle, delta ); } -__forceinline int psxTestCycle( u32 startCycle, s32 delta ) +__fi int psxTestCycle( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't explode // if the startCycle is ahead of our current cpu cycle. @@ -130,7 +130,7 @@ __forceinline int psxTestCycle( u32 startCycle, s32 delta ) return (int)(psxRegs.cycle - startCycle) >= delta; } -__forceinline void PSX_INT( IopEventId n, s32 ecycle ) +__fi void PSX_INT( IopEventId n, s32 ecycle ) { // Generally speaking games shouldn't throw ints that haven't been cleared yet. // It's usually indicative of something amiss in our emulation, so uncomment this @@ -161,7 +161,7 @@ __forceinline void PSX_INT( IopEventId n, s32 ecycle ) } } -static __forceinline void IopTestEvent( IopEventId n, void (*callback)() ) +static __fi void IopTestEvent( IopEventId n, void (*callback)() ) { if( !(psxRegs.interrupt & (1 << n)) ) return; @@ -174,7 +174,7 @@ static __forceinline void IopTestEvent( IopEventId n, void (*callback)() ) psxSetNextBranch( psxRegs.sCycle[n], psxRegs.eCycle[n] ); } -static __forceinline void sifHackInterrupt() +static __fi void sifHackInterrupt() { // No reason -- just that sometimes the SIF fell asleep, and this wakes it up. @@ -186,7 +186,7 @@ static __forceinline void sifHackInterrupt() //PSX_INT( IopEvt_SIFhack, 128 ); } -static __forceinline void _psxTestInterrupts() +static __fi void _psxTestInterrupts() { IopTestEvent(IopEvt_SIF0, sif0Interrupt); // SIF0 IopTestEvent(IopEvt_SIF1, sif1Interrupt); // SIF1 @@ -211,7 +211,7 @@ static __forceinline void _psxTestInterrupts() } } -__releaseinline void psxBranchTest() +__ri void psxBranchTest() { if( psxTestCycle( psxNextsCounter, psxNextCounter ) ) { diff --git a/pcsx2/R3000AInterpreter.cpp b/pcsx2/R3000AInterpreter.cpp index 8552ed44a5..e65119d7f5 100644 --- a/pcsx2/R3000AInterpreter.cpp +++ b/pcsx2/R3000AInterpreter.cpp @@ -125,7 +125,7 @@ void psxJALR() /////////////////////////////////////////// // These macros are used to assemble the repassembler functions -static __forceinline void execI() +static __fi void execI() { psxRegs.code = iopMemRead32(psxRegs.pc); diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index 28eb4b5a13..f89ac8d769 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -90,7 +90,7 @@ void cpuReset() LastELF = L""; } -__releaseinline void cpuException(u32 code, u32 bd) +__ri void cpuException(u32 code, u32 bd) { bool errLevel2, checkStatus; u32 offset; @@ -201,7 +201,7 @@ void cpuTlbMissW(u32 addr, u32 bd) { cpuTlbMiss(addr, bd, EXC_CODE_TLBS); } -__forceinline void _cpuTestMissingINTC() { +__fi void _cpuTestMissingINTC() { if (cpuRegs.CP0.n.Status.val & 0x400 && psHu32(INTC_STAT) & psHu32(INTC_MASK)) { if ((cpuRegs.interrupt & (1 << 30)) == 0) { @@ -210,7 +210,7 @@ __forceinline void _cpuTestMissingINTC() { } } -__forceinline void _cpuTestMissingDMAC() { +__fi void _cpuTestMissingDMAC() { if (cpuRegs.CP0.n.Status.val & 0x800 && (psHu16(0xe012) & psHu16(0xe010) || psHu16(0xe010) & 0x8000)) { @@ -229,7 +229,7 @@ void cpuTestMissingHwInts() { } // sets a branch test to occur some time from an arbitrary starting point. -__forceinline void cpuSetNextBranch( u32 startCycle, s32 delta ) +__fi void cpuSetNextBranch( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. @@ -241,14 +241,14 @@ __forceinline void cpuSetNextBranch( u32 startCycle, s32 delta ) } // sets a branch to occur some time from the current cycle -__forceinline void cpuSetNextBranchDelta( s32 delta ) +__fi void cpuSetNextBranchDelta( s32 delta ) { cpuSetNextBranch( cpuRegs.cycle, delta ); } // tests the cpu cycle agaisnt the given start and delta values. // Returns true if the delta time has passed. -__forceinline int cpuTestCycle( u32 startCycle, s32 delta ) +__fi int cpuTestCycle( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't explode // if the startCycle is ahead of our current cpu cycle. @@ -257,18 +257,18 @@ __forceinline int cpuTestCycle( u32 startCycle, s32 delta ) } // tells the EE to run the branch test the next time it gets a chance. -__forceinline void cpuSetBranch() +__fi void cpuSetBranch() { g_nextBranchCycle = cpuRegs.cycle; } -__forceinline void cpuClearInt( uint i ) +__fi void cpuClearInt( uint i ) { jASSUME( i < 32 ); cpuRegs.interrupt &= ~(1 << i); } -static __forceinline void TESTINT( u8 n, void (*callback)() ) +static __fi void TESTINT( u8 n, void (*callback)() ) { if( !(cpuRegs.interrupt & (1 << n)) ) return; @@ -281,7 +281,7 @@ static __forceinline void TESTINT( u8 n, void (*callback)() ) cpuSetNextBranch( cpuRegs.sCycle[n], cpuRegs.eCycle[n] ); } -static __forceinline void _cpuTestInterrupts() +static __fi void _cpuTestInterrupts() { if (!dmacRegs->ctrl.DMAE || psHu8(DMAC_ENABLER+2) == 1) { @@ -315,7 +315,7 @@ static __forceinline void _cpuTestInterrupts() } } -static __forceinline void _cpuTestTIMR() +static __fi void _cpuTestTIMR() { cpuRegs.CP0.n.Count += cpuRegs.cycle-s_iLastCOP0Cycle; s_iLastCOP0Cycle = cpuRegs.cycle; @@ -333,7 +333,7 @@ static __forceinline void _cpuTestTIMR() } } -static __forceinline void _cpuTestPERF() +static __fi void _cpuTestPERF() { // Perfs are updated when read by games (COP0's MFC0/MTC0 instructions), so we need // only update them at semi-regular intervals to keep cpuRegs.cycle from wrapping @@ -361,7 +361,7 @@ u32 g_nextBranchCycle = 0; // Shared portion of the branch test, called from both the Interpreter // and the recompiler. (moved here to help alleviate redundant code) -__forceinline void _cpuBranchTest_Shared() +__fi void _cpuBranchTest_Shared() { ScopedBool etest(eeEventTestIsActive); g_nextBranchCycle = cpuRegs.cycle + eeWaitCycles; @@ -481,7 +481,7 @@ __forceinline void _cpuBranchTest_Shared() if( cpuIntsEnabled(0x800) ) TESTINT(31, dmacInterrupt); } -__releaseinline void cpuTestINTCInts() +__ri void cpuTestINTCInts() { // Check the internal Event System -- if one's already scheduled then don't bother: if( cpuRegs.interrupt & (1 << 30) ) return; @@ -507,7 +507,7 @@ __releaseinline void cpuTestINTCInts() } } -__forceinline void cpuTestDMACInts() +__fi void cpuTestDMACInts() { // Check the internal Event System -- if one's already scheduled then don't bother: if ( cpuRegs.interrupt & (1 << 31) ) return; @@ -534,20 +534,20 @@ __forceinline void cpuTestDMACInts() } } -__forceinline void cpuTestTIMRInts() { +__fi void cpuTestTIMRInts() { if ((cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001) { _cpuTestPERF(); _cpuTestTIMR(); } } -__forceinline void cpuTestHwInts() { +__fi void cpuTestHwInts() { cpuTestINTCInts(); cpuTestDMACInts(); cpuTestTIMRInts(); } -__forceinline void CPU_INT( EE_EventType n, s32 ecycle) +__fi void CPU_INT( EE_EventType n, s32 ecycle) { if( n != 2 && cpuRegs.interrupt & (1< Twice-thrown int on IRQ %d", n ); diff --git a/pcsx2/R5900OpcodeImpl.cpp b/pcsx2/R5900OpcodeImpl.cpp index aef661f33b..a54f503d5b 100644 --- a/pcsx2/R5900OpcodeImpl.cpp +++ b/pcsx2/R5900OpcodeImpl.cpp @@ -24,7 +24,7 @@ #include "R5900Exceptions.h" -static __forceinline s64 _add64_Overflow( s64 x, s64 y ) +static __fi s64 _add64_Overflow( s64 x, s64 y ) { const s64 result = x + y; @@ -43,7 +43,7 @@ static __forceinline s64 _add64_Overflow( s64 x, s64 y ) return result; } -static __forceinline s64 _add32_Overflow( s32 x, s32 y ) +static __fi s64 _add32_Overflow( s32 x, s32 y ) { GPR_reg64 result; result.SD[0] = (s64)x + y; diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index 0cb674c71f..7eeef1a0f9 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -84,7 +84,7 @@ int _SPR0chain() return (spr0->qwc); // bus is 1/2 the ee speed } -__forceinline void SPR0chain() +__fi void SPR0chain() { CPU_INT(DMAC_FROM_SPR, _SPR0chain() / BIAS); spr0->qwc = 0; @@ -132,7 +132,7 @@ void _SPR0interleave() spr0->qwc = 0; } -static __forceinline void _dmaSPR0() +static __fi void _dmaSPR0() { if (dmacRegs->ctrl.STS == STS_fromSPR) { @@ -273,7 +273,7 @@ void dmaSPR0() // fromSPR SPRFROMinterrupt(); } -__forceinline static void SPR1transfer(const void* data, int qwc) +__fi static void SPR1transfer(const void* data, int qwc) { memcpy_qwc(&psSu128(spr1->sadr), data, qwc); spr1->sadr += qwc * 16; @@ -294,7 +294,7 @@ int _SPR1chain() return (spr1->qwc); } -__forceinline void SPR1chain() +__fi void SPR1chain() { CPU_INT(DMAC_TO_SPR, _SPR1chain() / BIAS); spr1->qwc = 0; diff --git a/pcsx2/Sif.cpp b/pcsx2/Sif.cpp index f948323516..a8c580e242 100644 --- a/pcsx2/Sif.cpp +++ b/pcsx2/Sif.cpp @@ -26,7 +26,7 @@ void sifInit() memzero(sif1); } -__forceinline void dmaSIF2() +__fi void dmaSIF2() { SIF_LOG(wxString(L"dmaSIF2" + sif2dma->cmq_to_str()).To8BitData()); diff --git a/pcsx2/Sif0.cpp b/pcsx2/Sif0.cpp index 4a551ef647..5cb93d78e7 100644 --- a/pcsx2/Sif0.cpp +++ b/pcsx2/Sif0.cpp @@ -24,7 +24,7 @@ _sif sif0; static bool done = false; -static __forceinline void Sif0Init() +static __fi void Sif0Init() { SIF_LOG("SIF0 DMA start..."); done = false; @@ -33,7 +33,7 @@ static __forceinline void Sif0Init() } // Write from Fifo to EE. -static __forceinline bool WriteFifoToEE() +static __fi bool WriteFifoToEE() { const int readSize = min((s32)sif0dma->qwc, sif0.fifo.size >> 2); @@ -62,7 +62,7 @@ static __forceinline bool WriteFifoToEE() } // Write IOP to Fifo. -static __forceinline bool WriteIOPtoFifo() +static __fi bool WriteIOPtoFifo() { // There's some data ready to transfer into the fifo.. const int writeSize = min(sif0.iop.counter, sif0.fifo.free()); @@ -80,7 +80,7 @@ static __forceinline bool WriteIOPtoFifo() } // Read Fifo into an ee tag, transfer it to sif0dma, and process it. -static __forceinline bool ProcessEETag() +static __fi bool ProcessEETag() { static __aligned16 u32 tag[4]; @@ -121,7 +121,7 @@ static __forceinline bool ProcessEETag() } // Read Fifo into an iop tag, and transfer it to hw_dma(9). And presumably process it. -static __forceinline bool ProcessIOPTag() +static __fi bool ProcessIOPTag() { // Process DMA tag at hw_dma(9).tadr sif0.iop.data = *(sifData *)iopPhysMem(hw_dma(9).tadr); @@ -141,7 +141,7 @@ static __forceinline bool ProcessIOPTag() } // Stop transferring ee, and signal an interrupt. -static __forceinline void EndEE() +static __fi void EndEE() { SIF_LOG("Sif0: End EE"); sif0.ee.end = false; @@ -156,7 +156,7 @@ static __forceinline void EndEE() } // Stop transferring iop, and signal an interrupt. -static __forceinline void EndIOP() +static __fi void EndIOP() { SIF_LOG("Sif0: End IOP"); sif0data = 0; @@ -175,7 +175,7 @@ static __forceinline void EndIOP() } // Handle the EE transfer. -static __forceinline void HandleEETransfer() +static __fi void HandleEETransfer() { if(sif0dma->chcr.STR == false) { @@ -253,7 +253,7 @@ static __forceinline void HandleEETransfer() // SIF - 8 = 0 (pos=12) // SIF0 DMA end... -static __forceinline void HandleIOPTransfer() +static __fi void HandleIOPTransfer() { if (sif0.iop.counter <= 0) // If there's no more to transfer { @@ -280,13 +280,13 @@ static __forceinline void HandleIOPTransfer() } } -static __forceinline void Sif0End() +static __fi void Sif0End() { SIF_LOG("SIF0 DMA end..."); } // Transfer IOP to EE, putting data in the fifo as an intermediate step. -__forceinline void SIF0Dma() +__fi void SIF0Dma() { int BusyCheck = 0; Sif0Init(); @@ -317,19 +317,19 @@ __forceinline void SIF0Dma() Sif0End(); } -__forceinline void sif0Interrupt() +__fi void sif0Interrupt() { HW_DMA9_CHCR &= ~0x01000000; psxDmaInterrupt2(2); } -__forceinline void EEsif0Interrupt() +__fi void EEsif0Interrupt() { hwDmacIrq(DMAC_SIF0); sif0dma->chcr.STR = false; } -__forceinline void dmaSIF0() +__fi void dmaSIF0() { SIF_LOG(wxString(L"dmaSIF0" + sif0dma->cmqt_to_str()).To8BitData()); diff --git a/pcsx2/Sif1.cpp b/pcsx2/Sif1.cpp index c3c039ae0f..99f60d74d7 100644 --- a/pcsx2/Sif1.cpp +++ b/pcsx2/Sif1.cpp @@ -24,7 +24,7 @@ _sif sif1; static bool done = false; -static __forceinline void Sif1Init() +static __fi void Sif1Init() { SIF_LOG("SIF1 DMA start..."); done = false; @@ -33,7 +33,7 @@ static __forceinline void Sif1Init() } // Write from the EE to Fifo. -static __forceinline bool WriteEEtoFifo() +static __fi bool WriteEEtoFifo() { // There's some data ready to transfer into the fifo.. @@ -59,7 +59,7 @@ static __forceinline bool WriteEEtoFifo() } // Read from the fifo and write to IOP -static __forceinline bool WriteFifoToIOP() +static __fi bool WriteFifoToIOP() { // If we're reading something, continue to do so. @@ -78,7 +78,7 @@ static __forceinline bool WriteFifoToIOP() } // Get a tag and process it. -static __forceinline bool ProcessEETag() +static __fi bool ProcessEETag() { // Chain mode tDMA_TAG *ptag; @@ -142,7 +142,7 @@ static __forceinline bool ProcessEETag() } // Write fifo to data, and put it in IOP. -static __forceinline bool SIFIOPReadTag() +static __fi bool SIFIOPReadTag() { // Read a tag. sif1.fifo.read((u32*)&sif1.iop.data, 4); @@ -160,7 +160,7 @@ static __forceinline bool SIFIOPReadTag() } // Stop processing EE, and signal an interrupt. -static __forceinline void EndEE() +static __fi void EndEE() { sif1.ee.end = false; sif1.ee.busy = false; @@ -180,7 +180,7 @@ static __forceinline void EndEE() } // Stop processing IOP, and signal an interrupt. -static __forceinline void EndIOP() +static __fi void EndIOP() { sif1data = 0; sif1.iop.end = false; @@ -201,7 +201,7 @@ static __forceinline void EndIOP() } // Handle the EE transfer. -static __forceinline void HandleEETransfer() +static __fi void HandleEETransfer() { if(sif1dma->chcr.STR == false) { @@ -248,7 +248,7 @@ static __forceinline void HandleEETransfer() } // Handle the IOP transfer. -static __forceinline void HandleIOPTransfer() +static __fi void HandleIOPTransfer() { if (sif1.iop.counter > 0) { @@ -274,13 +274,13 @@ static __forceinline void HandleIOPTransfer() } } -static __forceinline void Sif1End() +static __fi void Sif1End() { SIF_LOG("SIF1 DMA end..."); } // Transfer EE to IOP, putting data in the fifo as an intermediate step. -__forceinline void SIF1Dma() +__fi void SIF1Dma() { int BusyCheck = 0; Sif1Init(); @@ -313,13 +313,13 @@ __forceinline void SIF1Dma() Sif1End(); } -__forceinline void sif1Interrupt() +__fi void sif1Interrupt() { HW_DMA10_CHCR &= ~0x01000000; //reset TR flag psxDmaInterrupt2(3); } -__forceinline void EEsif1Interrupt() +__fi void EEsif1Interrupt() { hwDmacIrq(DMAC_SIF1); sif1dma->chcr.STR = false; @@ -327,7 +327,7 @@ __forceinline void EEsif1Interrupt() // Do almost exactly the same thing as psxDma10 in IopDma.cpp. // Main difference is this checks for iop, where psxDma10 checks for ee. -__forceinline void dmaSIF1() +__fi void dmaSIF1() { SIF_LOG(wxString(L"dmaSIF1" + sif1dma->cmqt_to_str()).To8BitData()); diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index 17f6294fbc..1bf3b3e7f2 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -36,14 +36,14 @@ static int m_ForceEjectionTimeout[2]; #ifdef SIO_INLINE_IRQS #define SIO_INT() sioInterrupt() -#define SIO_FORCEINLINE __forceinline +#define SIO_FORCEINLINE __fi #else -__forceinline void SIO_INT() +__fi void SIO_INT() { if( !(psxRegs.interrupt & (1<statusflag|= (Flag_D | (VU0.VI[REG_STATUS_FLAG].US[0] >> 5)) << 11; } -static __releaseinline u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) { u32 v = *(u32*)&f; int exp = (v >> 23) & 0xff; @@ -72,47 +72,47 @@ static __releaseinline u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) } } -__forceinline u32 VU_MACx_UPDATE(VURegs * VU, float x) +__fi u32 VU_MACx_UPDATE(VURegs * VU, float x) { return VU_MAC_UPDATE(3, VU, x); } -__forceinline u32 VU_MACy_UPDATE(VURegs * VU, float y) +__fi u32 VU_MACy_UPDATE(VURegs * VU, float y) { return VU_MAC_UPDATE(2, VU, y); } -__forceinline u32 VU_MACz_UPDATE(VURegs * VU, float z) +__fi u32 VU_MACz_UPDATE(VURegs * VU, float z) { return VU_MAC_UPDATE(1, VU, z); } -__forceinline u32 VU_MACw_UPDATE(VURegs * VU, float w) +__fi u32 VU_MACw_UPDATE(VURegs * VU, float w) { return VU_MAC_UPDATE(0, VU, w); } -__forceinline void VU_MACx_CLEAR(VURegs * VU) +__fi void VU_MACx_CLEAR(VURegs * VU) { VU->macflag&= ~(0x1111<<3); } -__forceinline void VU_MACy_CLEAR(VURegs * VU) +__fi void VU_MACy_CLEAR(VURegs * VU) { VU->macflag&= ~(0x1111<<2); } -__forceinline void VU_MACz_CLEAR(VURegs * VU) +__fi void VU_MACz_CLEAR(VURegs * VU) { VU->macflag&= ~(0x1111<<1); } -__forceinline void VU_MACw_CLEAR(VURegs * VU) +__fi void VU_MACw_CLEAR(VURegs * VU) { VU->macflag&= ~(0x1111<<0); } -__releaseinline void VU_STAT_UPDATE(VURegs * VU) { +__ri void VU_STAT_UPDATE(VURegs * VU) { int newflag = 0 ; if (VU->macflag & 0x000F) newflag = 0x1; if (VU->macflag & 0x00F0) newflag |= 0x2; diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 0172fd1d9f..f3fc402fc6 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -45,7 +45,7 @@ static __aligned16 VECTOR RDzero; -static __releaseinline void __fastcall _vuFMACflush(VURegs * VU) { +static __ri void _vuFMACflush(VURegs * VU) { int i; for (i=0; i<8; i++) { @@ -62,7 +62,7 @@ static __releaseinline void __fastcall _vuFMACflush(VURegs * VU) { } } -static __releaseinline void __fastcall _vuFDIVflush(VURegs * VU) { +static __ri void _vuFDIVflush(VURegs * VU) { if (VU->fdiv.enable == 0) return; if ((VU->cycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { @@ -74,7 +74,7 @@ static __releaseinline void __fastcall _vuFDIVflush(VURegs * VU) { } } -static __releaseinline void __fastcall _vuEFUflush(VURegs * VU) { +static __ri void _vuEFUflush(VURegs * VU) { if (VU->efu.enable == 0) return; if ((VU->cycle - VU->efu.sCycle) >= VU->efu.Cycle) { @@ -140,7 +140,7 @@ void _vuFlushAll(VURegs* VU) } while(nRepeat); } -__forceinline void _vuTestPipes(VURegs * VU) { +__fi void _vuTestPipes(VURegs * VU) { _vuFMACflush(VU); _vuFDIVflush(VU); _vuEFUflush(VU); @@ -169,7 +169,7 @@ static void __fastcall _vuFMACTestStall(VURegs * VU, int reg, int xyzw) { _vuTestPipes(VU); } -static __releaseinline void __fastcall _vuFMACAdd(VURegs * VU, int reg, int xyzw) { +static __ri void __fastcall _vuFMACAdd(VURegs * VU, int reg, int xyzw) { int i; /* find a free fmac pipe */ @@ -192,7 +192,7 @@ static __releaseinline void __fastcall _vuFMACAdd(VURegs * VU, int reg, int xyzw VU->fmac[i].clipflag = VU->clipflag; } -static __releaseinline void __fastcall _vuFDIVAdd(VURegs * VU, int cycles) { +static __ri void __fastcall _vuFDIVAdd(VURegs * VU, int cycles) { VUM_LOG("adding FDIV pipe"); VU->fdiv.enable = 1; @@ -202,7 +202,7 @@ static __releaseinline void __fastcall _vuFDIVAdd(VURegs * VU, int cycles) { VU->fdiv.statusflag = VU->statusflag; } -static __releaseinline void __fastcall _vuEFUAdd(VURegs * VU, int cycles) { +static __ri void __fastcall _vuEFUAdd(VURegs * VU, int cycles) { // VUM_LOG("adding EFU pipe\n"); VU->efu.enable = 1; @@ -211,7 +211,7 @@ static __releaseinline void __fastcall _vuEFUAdd(VURegs * VU, int cycles) { VU->efu.reg.F = VU->p.F; } -static __releaseinline void __fastcall _vuFlushFDIV(VURegs * VU) { +static __ri void __fastcall _vuFlushFDIV(VURegs * VU) { int cycle; if (VU->fdiv.enable == 0) return; @@ -225,7 +225,7 @@ static __releaseinline void __fastcall _vuFlushFDIV(VURegs * VU) { VU->VI[REG_STATUS_FLAG].UL = VU->fdiv.statusflag; } -static __releaseinline void __fastcall _vuFlushEFU(VURegs * VU) { +static __ri void __fastcall _vuFlushEFU(VURegs * VU) { int cycle; if (VU->efu.enable == 0) return; @@ -238,7 +238,7 @@ static __releaseinline void __fastcall _vuFlushEFU(VURegs * VU) { VU->VI[REG_P].UL = VU->efu.reg.UL; } -__forceinline void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { +static __fi void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { if (VUregsn->VFread0) { _vuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); } @@ -247,7 +247,7 @@ __forceinline void _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { } } -__forceinline void _vuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { +static __fi void _vuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { if (VUregsn->VFwrite) { _vuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw); } else @@ -258,36 +258,36 @@ __forceinline void _vuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { } } -__forceinline void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { +static __fi void _vuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { // _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); _vuFlushFDIV(VU); } -__forceinline void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { +static __fi void _vuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { if (VUregsn->VIwrite & (1 << REG_Q)) { _vuFDIVAdd(VU, VUregsn->cycles); } } -__forceinline void _vuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { +static __fi void _vuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { // _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); _vuFlushEFU(VU); } -__forceinline void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { +static __fi void _vuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { if (VUregsn->VIwrite & (1 << REG_P)) { _vuEFUAdd(VU, VUregsn->cycles); } } -__forceinline void _vuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { +__fi void _vuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { switch (VUregsn->pipe) { case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break; } } -__forceinline void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { +__fi void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { switch (VUregsn->pipe) { case VUPIPE_FMAC: _vuTestFMACStalls(VU, VUregsn); break; case VUPIPE_FDIV: _vuTestFDIVStalls(VU, VUregsn); break; @@ -295,13 +295,13 @@ __forceinline void _vuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { } } -__forceinline void _vuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { +__fi void _vuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { switch (VUregsn->pipe) { case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn); break; } } -__forceinline void _vuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { +__fi void _vuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { switch (VUregsn->pipe) { case VUPIPE_FMAC: _vuAddFMACStalls(VU, VUregsn); break; case VUPIPE_FDIV: _vuAddFDIVStalls(VU, VUregsn); break; @@ -332,7 +332,7 @@ static float __fastcall vuDouble(u32 f) return *(float*)&f; } #else -static __forceinline float vuDouble(u32 f) +static __fi float vuDouble(u32 f) { return *(float*)&f; } @@ -1577,7 +1577,7 @@ void _vuMR32(VURegs * VU) { // Load / Store Instructions (VU Interpreter) // -------------------------------------------------------------------------------------- -__forceinline u32* GET_VU_MEM(VURegs* VU, u32 addr) // non-static, also used by sVU for now. +__fi u32* GET_VU_MEM(VURegs* VU, u32 addr) // non-static, also used by sVU for now. { if( VU == g_pVU1 ) return (u32*)(VU1.Mem+(addr&0x3fff)); if( addr >= 0x4000 ) return (u32*)(VU0.Mem+(addr&0x43f0)); // get VF and VI regs (they're mapped to 0x4xx0 in VU0 mem!) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 5411256720..a17dd47190 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -86,7 +86,7 @@ void SaveStateBase::vif1Freeze() extern bool _chainVIF0(); extern bool _VIF0chain(); -_f void vif0FBRST(u32 value) { +__fi void vif0FBRST(u32 value) { VIF_LOG("VIF0_FBRST write32 0x%8.8x", value); if (value & 0x1) // Reset Vif. @@ -147,7 +147,7 @@ _f void vif0FBRST(u32 value) { } } -_f void vif1FBRST(u32 value) { +__fi void vif1FBRST(u32 value) { VIF_LOG("VIF1_FBRST write32 0x%8.8x", value); if (FBRST(value).RST) // Reset Vif. @@ -241,7 +241,7 @@ _f void vif1FBRST(u32 value) { } } -_f void vif1STAT(u32 value) { +__fi void vif1STAT(u32 value) { VIF_LOG("VIF1_STAT write32 0x%8.8x", value); /* Only FDR bit is writable, so mask the rest */ diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index b16c669a1d..7fd900e1fe 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -223,8 +223,6 @@ extern VIFregisters *vifRegs; #define GetVifX (idx ? (vif1) : (vif0)) #define vifXch (idx ? (vif1ch) : (vif0ch)) #define vifXRegs (idx ? (vif1Regs) : (vif0Regs)) -#define _f __forceinline -#define _ri __releaseinline extern void dmaVIF0(); extern void dmaVIF1(); diff --git a/pcsx2/Vif0_Dma.cpp b/pcsx2/Vif0_Dma.cpp index 7e76559790..2adfadded2 100644 --- a/pcsx2/Vif0_Dma.cpp +++ b/pcsx2/Vif0_Dma.cpp @@ -21,7 +21,7 @@ // Run VU0 until finish, don't add cycles to EE // because its vif stalling not the EE core... -__forceinline void vif0FLUSH() +__fi void vif0FLUSH() { if(g_packetsizeonvu > vif0.vifpacketsize && g_vu0Cycles > 0) { @@ -78,7 +78,7 @@ bool _VIF0chain() return VIF0transfer(pMem, vif0ch->qwc * 4); } -__forceinline void vif0SetupTransfer() +__fi void vif0SetupTransfer() { tDMA_TAG *ptag; @@ -138,7 +138,7 @@ __forceinline void vif0SetupTransfer() } } -__forceinline void vif0Interrupt() +__fi void vif0Interrupt() { VIF_LOG("vif0Interrupt: %8.8x", cpuRegs.cycle); diff --git a/pcsx2/Vif1_Dma.cpp b/pcsx2/Vif1_Dma.cpp index d4b33e23b4..b8538cffea 100644 --- a/pcsx2/Vif1_Dma.cpp +++ b/pcsx2/Vif1_Dma.cpp @@ -22,7 +22,7 @@ #include "newVif.h" -__forceinline void vif1FLUSH() +__fi void vif1FLUSH() { if(g_packetsizeonvu > vif1.vifpacketsize && g_vu1Cycles > 0) { @@ -180,7 +180,7 @@ bool _VIF1chain() return VIF1transfer(pMem, vif1ch->qwc * 4); } -__forceinline void vif1SetupTransfer() +__fi void vif1SetupTransfer() { tDMA_TAG *ptag; DMACh& vif1c = (DMACh&)PS2MEM_HW[0x9000]; @@ -340,7 +340,7 @@ bool CheckPath2GIF(EE_EventType channel) } return true; } -__forceinline void vif1Interrupt() +__fi void vif1Interrupt() { VIF_LOG("vif1Interrupt: %8.8x", cpuRegs.cycle); diff --git a/pcsx2/Vif1_MFIFO.cpp b/pcsx2/Vif1_MFIFO.cpp index ee2f9d29ac..012d01699f 100644 --- a/pcsx2/Vif1_MFIFO.cpp +++ b/pcsx2/Vif1_MFIFO.cpp @@ -36,7 +36,7 @@ static u32 qwctag(u32 mask) return (dmacRegs->rbor.ADDR + (mask & dmacRegs->rbsr.RMSK)); } -static __forceinline bool mfifoVIF1rbTransfer() +static __fi bool mfifoVIF1rbTransfer() { u32 maddr = dmacRegs->rbor.ADDR; u32 msize = dmacRegs->rbor.ADDR + dmacRegs->rbsr.RMSK + 16; @@ -93,7 +93,7 @@ static __forceinline bool mfifoVIF1rbTransfer() return ret; } -static __forceinline bool mfifo_VIF1chain() +static __fi bool mfifo_VIF1chain() { bool ret; diff --git a/pcsx2/Vif_Codes.cpp b/pcsx2/Vif_Codes.cpp index 5f3c1798fa..6af062a435 100644 --- a/pcsx2/Vif_Codes.cpp +++ b/pcsx2/Vif_Codes.cpp @@ -32,12 +32,12 @@ vifOp(vifCode_Null); // Vif0/Vif1 Misc Functions //------------------------------------------------------------------ -static _f void vifFlush(int idx) { +static __fi void vifFlush(int idx) { if (!idx) vif0FLUSH(); else vif1FLUSH(); } -static _f void vuExecMicro(int idx, u32 addr) { +static __fi void vuExecMicro(int idx, u32 addr) { VURegs* VU = nVif[idx].VU; int startcycles = 0; //vifFlush(idx); @@ -116,7 +116,7 @@ vifOp(vifCode_Base) { extern bool SIGNAL_IMR_Pending; -template _f int _vifCode_Direct(int pass, const u8* data, bool isDirectHL) { +template __fi int _vifCode_Direct(int pass, const u8* data, bool isDirectHL) { pass1 { vif1Only(); int vifImm = (u16)vif1Regs->code; @@ -301,7 +301,7 @@ vifOp(vifCode_Mark) { return 0; } -static _f void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) { +static __fi void _vifCode_MPG(int idx, u32 addr, const u32 *data, int size) { VURegs& VUx = idx ? VU1 : VU0; pxAssume(VUx.Micro > 0); @@ -423,7 +423,7 @@ vifOp(vifCode_Offset) { return 0; } -template static _f int _vifCode_STColRow(const u32* data, u32* pmem1, u32* pmem2) { +template static __fi int _vifCode_STColRow(const u32* data, u32* pmem1, u32* pmem2) { vifStruct& vifX = GetVifX; int ret = min(4 - vifX.tag.addr, vifX.vifpacketsize); diff --git a/pcsx2/Vif_Dma.h b/pcsx2/Vif_Dma.h index d7cc369eda..2b66f7db7f 100644 --- a/pcsx2/Vif_Dma.h +++ b/pcsx2/Vif_Dma.h @@ -98,7 +98,7 @@ typedef FnType_VifCmdHandler* Fnptr_VifCmdHandler; extern const __aligned16 Fnptr_VifCmdHandler vifCmdHandler[2][128]; -__forceinline static int _limit(int a, int max) +__fi static int _limit(int a, int max) { return ((a > max) ? max : a); } diff --git a/pcsx2/Vif_Transfer.cpp b/pcsx2/Vif_Transfer.cpp index 771c98d909..c9a5b7e83a 100644 --- a/pcsx2/Vif_Transfer.cpp +++ b/pcsx2/Vif_Transfer.cpp @@ -101,7 +101,7 @@ _vifT void vifTransferLoop(u32* &data) { if (pSize) vifX.vifstalled = true; } -_vifT _f bool vifTransfer(u32 *data, int size) { +_vifT static __fi bool vifTransfer(u32 *data, int size) { vifStruct& vifX = GetVifX; // irqoffset necessary to add up the right qws, or else will spin (spiderman) diff --git a/pcsx2/Vif_Unpack.cpp b/pcsx2/Vif_Unpack.cpp index 39cb8e0640..ba125623c3 100644 --- a/pcsx2/Vif_Unpack.cpp +++ b/pcsx2/Vif_Unpack.cpp @@ -25,7 +25,7 @@ enum UnpackOffset { OFFSET_W = 3 }; -static __forceinline u32 setVifRowRegs(u32 reg, u32 data) { +static __fi u32 setVifRowRegs(u32 reg, u32 data) { switch (reg) { case 0: vifRegs->r0 = data; break; case 1: vifRegs->r1 = data; break; @@ -36,7 +36,7 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) { return data; } -static __forceinline u32 getVifRowRegs(u32 reg) { +static __fi u32 getVifRowRegs(u32 reg) { switch (reg) { case 0: return vifRegs->r0; break; case 1: return vifRegs->r1; break; @@ -47,7 +47,7 @@ static __forceinline u32 getVifRowRegs(u32 reg) { return 0; // unreachable... } -static __forceinline u32 getVifColRegs(u32 reg) { +static __fi u32 getVifColRegs(u32 reg) { switch (reg) { case 0: return vifRegs->c0; break; case 1: return vifRegs->c1; break; @@ -58,7 +58,7 @@ static __forceinline u32 getVifColRegs(u32 reg) { } template< bool doMask > -static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { +static __ri void writeXYZW(u32 offnum, u32 &dest, u32 data) { u32 vifRowReg = getVifRowRegs(offnum); int n = 0; @@ -89,7 +89,7 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { } template < bool doMask, class T > -static __forceinline void __fastcall UNPACK_S(u32 *dest, const T *data, int size) +static __fi void __fastcall UNPACK_S(u32 *dest, const T *data, int size) { //S-# will always be a complete packet, no matter what. So we can skip the offset bits writeXYZW(OFFSET_X, *dest++, *data); @@ -99,7 +99,7 @@ static __forceinline void __fastcall UNPACK_S(u32 *dest, const T *data, int size } template -static __forceinline void __fastcall UNPACK_V2(u32 *dest, const T *data, int size) +static __ri void __fastcall UNPACK_V2(u32 *dest, const T *data, int size) { if (vifRegs->offset == OFFSET_X) { @@ -135,7 +135,7 @@ static __forceinline void __fastcall UNPACK_V2(u32 *dest, const T *data, int siz } template -static __forceinline void __fastcall UNPACK_V3(u32 *dest, const T *data, int size) +static __ri void __fastcall UNPACK_V3(u32 *dest, const T *data, int size) { if(vifRegs->offset == OFFSET_X) { @@ -177,7 +177,7 @@ static __forceinline void __fastcall UNPACK_V3(u32 *dest, const T *data, int siz } template -static __forceinline void __fastcall UNPACK_V4(u32 *dest, const T *data , int size) +static __fi void __fastcall UNPACK_V4(u32 *dest, const T *data , int size) { while (size > 0) { @@ -190,7 +190,7 @@ static __forceinline void __fastcall UNPACK_V4(u32 *dest, const T *data , int si } template< bool doMask > -static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, const u32 *data, int size) +static __ri void __fastcall UNPACK_V4_5(u32 *dest, const u32 *data, int size) { //As with S-#, this will always be a complete packet writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index a7dde49ca1..39a449470f 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -805,12 +805,12 @@ void Pcsx2App::CleanUp() _parent::CleanUp(); } -__forceinline wxString AddAppName( const wxChar* fmt ) +__fi wxString AddAppName( const wxChar* fmt ) { return wxsFormat( fmt, pxGetAppName().c_str() ); } -__forceinline wxString AddAppName( const char* fmt ) +__fi wxString AddAppName( const char* fmt ) { return wxsFormat( fromUTF8(fmt), pxGetAppName().c_str() ); } diff --git a/pcsx2/gui/AppMain.cpp b/pcsx2/gui/AppMain.cpp index f99455de65..31256cc928 100644 --- a/pcsx2/gui/AppMain.cpp +++ b/pcsx2/gui/AppMain.cpp @@ -886,7 +886,7 @@ void Pcsx2App::SysExecute( CDVD_SourceType cdvdsrc, const wxString& elf_override // Thread Safety: The state of the system can change in parallel to execution of the // main thread. If you need to perform an extended length activity on the execution // state (such as saving it), you *must* suspend the Corethread first! -__forceinline bool SysHasValidState() +__fi bool SysHasValidState() { return CoreThread.HasActiveMachine(); } diff --git a/pcsx2/ps2/GIFpath.cpp b/pcsx2/ps2/GIFpath.cpp index 4c736bf8aa..4ed7b20866 100644 --- a/pcsx2/ps2/GIFpath.cpp +++ b/pcsx2/ps2/GIFpath.cpp @@ -113,7 +113,7 @@ struct GifPathStruct const GIFRegHandler Handlers[0x100-0x60]; // handlers for 0x60->0x100 GIFPath path[3]; - __forceinline GIFPath& operator[]( int idx ) { return path[idx]; } + __fi GIFPath& operator[]( int idx ) { return path[idx]; } }; @@ -249,13 +249,13 @@ GIFPath::GIFPath() : tag() Reset(); } -__forceinline void GIFPath::Reset() +__fi void GIFPath::Reset() { memzero(*this); const_cast(tag).EOP = 1; } -__forceinline bool GIFPath::StepReg() +__fi bool GIFPath::StepReg() { if (++curreg >= numregs) { curreg = 0; @@ -266,13 +266,13 @@ __forceinline bool GIFPath::StepReg() return true; } -__forceinline u8 GIFPath::GetReg() { return regs[curreg]; } +__fi u8 GIFPath::GetReg() { return regs[curreg]; } // Unpack the registers - registers are stored as a sequence of 4 bit values in the // upper 64 bits of the GIFTAG. That sucks for us when handling partialized GIF packets // coming in from paths 2 and 3, so we unpack them into an 8 bit array here. // -__forceinline void GIFPath::PrepPackedRegs() +__fi void GIFPath::PrepPackedRegs() { // Only unpack registers if we're starting a new pack. Otherwise the unpacked // array should have already been initialized by a previous partial transfer. @@ -292,7 +292,7 @@ __forceinline void GIFPath::PrepPackedRegs() template< bool Aligned > -__forceinline void GIFPath::SetTag(const void* mem) +__fi void GIFPath::SetTag(const void* mem) { _mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) ); @@ -300,7 +300,7 @@ __forceinline void GIFPath::SetTag(const void* mem) curreg = 0; } -__forceinline bool GIFPath::IsActive() const +__fi bool GIFPath::IsActive() const { return (nloop != 0) || !tag.EOP; } @@ -312,7 +312,7 @@ void SaveStateBase::gifPathFreeze() } -static __forceinline void gsHandler(const u8* pMem) +static __fi void gsHandler(const u8* pMem) { const int reg = pMem[8]; @@ -382,7 +382,7 @@ static __forceinline void gsHandler(const u8* pMem) // size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the // path does not terminate (EOP) within the specified size, it is assumed that the path must // loop around to the start of VU memory and continue processing. -__forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size) +__fi int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size) { u32 startSize = size; // Start Size @@ -529,7 +529,7 @@ __forceinline int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 s return size; } -__releaseinline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len ) +__ri void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& destStart, uint destSize, uint len ) { uint endpos = destStart + len; if( endpos < destSize ) @@ -547,7 +547,7 @@ __releaseinline void MemCopy_WrappedDest( const u128* src, u128* destBase, uint& } } -__releaseinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len ) +__ri void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, uint srcSize, u128* dest, uint len ) { uint endpos = srcStart + len; if( endpos < srcSize ) @@ -576,7 +576,7 @@ __releaseinline void MemCopy_WrappedSrc( const u128* srcBase, uint& srcStart, ui // path does not terminate (EOP) within the specified size, it is assumed that the path must // loop around to the start of VU memory and continue processing. template< GIF_PATH pathidx, bool Aligned > -__forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size) +__fi int GIFPath::CopyTag(const u128* pMem128, u32 size) { uint& ringpos = GetMTGS().m_packet_writepos; const uint original_ringpos = ringpos; @@ -874,7 +874,7 @@ __forceinline int GIFPath::CopyTag(const u128* pMem128, u32 size) // size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the // path does not terminate (EOP) within the specified size, it is assumed that the path must // loop around to the start of VU memory and continue processing. -__forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size) +__fi int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size) { switch( pathidx ) { @@ -900,7 +900,7 @@ __forceinline int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size) // Quick version for queuing PATH1 data. // This version calculates the real length of the packet data only. It does not process // IRQs or DMA status updates. -__forceinline int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size) +__fi int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size) { int retSize = s_gifPath[pathidx].ParseTagQuick(pathidx, pMem, size); return retSize; @@ -915,7 +915,7 @@ void GIFPath_Reset() // This is a hackfix tool provided for "canceling" the contents of the GIFpath when // invalid GIFdma states are encountered (typically needed for PATH3 only). -__forceinline void GIFPath_Clear( GIF_PATH pathidx ) +__fi void GIFPath_Clear( GIF_PATH pathidx ) { memzero(s_gifPath.path[pathidx]); s_gifPath.path[pathidx].Reset(); diff --git a/pcsx2/ps2/Iop/IopHwRead.cpp b/pcsx2/ps2/Iop/IopHwRead.cpp index c9a5874524..837defb524 100644 --- a/pcsx2/ps2/Iop/IopHwRead.cpp +++ b/pcsx2/ps2/Iop/IopHwRead.cpp @@ -114,7 +114,7 @@ mem8_t __fastcall iopHwRead8_Page8( u32 addr ) ////////////////////////////////////////////////////////////////////////////////////////// // template< typename T > -static __forceinline T _HwRead_16or32_Page1( u32 addr ) +static __fi T _HwRead_16or32_Page1( u32 addr ) { // all addresses are assumed to be prefixed with 0x1f801xxx: jASSUME( (addr >> 12) == 0x1f801 ); diff --git a/pcsx2/ps2/Iop/IopHwWrite.cpp b/pcsx2/ps2/Iop/IopHwWrite.cpp index 0bdc6ca22e..96adc0092b 100644 --- a/pcsx2/ps2/Iop/IopHwWrite.cpp +++ b/pcsx2/ps2/Iop/IopHwWrite.cpp @@ -30,7 +30,7 @@ using namespace Internal; ////////////////////////////////////////////////////////////////////////////////////////// // template< typename T > -static __forceinline void _generic_write( u32 addr, T val ) +static __fi void _generic_write( u32 addr, T val ) { //int bitsize = (sizeof(T) == 1) ? 8 : ( (sizeof(T) == 2) ? 16 : 32 ); IopHwTraceLog( addr, val, "Write" ); @@ -44,7 +44,7 @@ void __fastcall iopHwWrite32_generic( u32 addr, mem32_t val ) { _generic_write -static __forceinline T _generic_read( u32 addr ) +static __fi T _generic_read( u32 addr ) { //int bitsize = (sizeof(T) == 1) ? 8 : ( (sizeof(T) == 2) ? 16 : 32 ); @@ -157,7 +157,7 @@ void __fastcall iopHwWrite8_Page8( u32 addr, mem8_t val ) // Templated handler for both 32 and 16 bit write operations, to Page 1 registers. // template< typename T > -static __forceinline void _HwWrite_16or32_Page1( u32 addr, T val ) +static __fi void _HwWrite_16or32_Page1( u32 addr, T val ) { // all addresses are assumed to be prefixed with 0x1f801xxx: pxAssert( (addr >> 12) == 0x1f801 ); diff --git a/pcsx2/ps2/Iop/IopHw_Internal.h b/pcsx2/ps2/Iop/IopHw_Internal.h index a22a4db354..c5340c34f6 100644 --- a/pcsx2/ps2/Iop/IopHw_Internal.h +++ b/pcsx2/ps2/Iop/IopHw_Internal.h @@ -38,7 +38,7 @@ namespace Internal { // template< typename T> -static __releaseinline const char* _log_GetIopHwName( u32 addr, T val ) +static __ri const char* _log_GetIopHwName( u32 addr, T val ) { switch( addr ) { @@ -200,7 +200,7 @@ static __releaseinline const char* _log_GetIopHwName( u32 addr, T val ) } template< typename T> -static __releaseinline void IopHwTraceLog( u32 addr, T val, const char* modestr ) +static __ri void IopHwTraceLog( u32 addr, T val, const char* modestr ) { if( !EmuConfig.Trace.IOP.m_EnableRegisters ) return; diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index 7c2dbbf78c..d4a76b2c66 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -65,7 +65,7 @@ vtlbHandler UnmappedPhyHandler1; // Interpreted VTLB lookup for 8, 16, and 32 bit accesses template -__forceinline DataType __fastcall MemOp_r0(u32 addr) +__fi DataType __fastcall MemOp_r0(u32 addr) { u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; @@ -94,7 +94,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr) // ------------------------------------------------------------------------ // Interpreterd VTLB lookup for 64 and 128 bit accesses. template -__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) +__fi void MemOp_r1(u32 addr, DataType* data) { u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; @@ -125,7 +125,7 @@ __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) // ------------------------------------------------------------------------ template -__forceinline void __fastcall MemOp_w0(u32 addr, DataType data) +__fi void MemOp_w0(u32 addr, DataType data) { u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; s32 ppf=addr+vmv; @@ -153,7 +153,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data) // ------------------------------------------------------------------------ template -__forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data) +__fi void MemOp_w1(u32 addr,const DataType* data) { verify(DataSize==128 || DataSize==64); u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS]; @@ -230,7 +230,7 @@ void __fastcall vtlb_memWrite128(u32 mem, const mem128_t *value) // // Generates a tlbMiss Exception -static __forceinline void vtlb_Miss(u32 addr,u32 mode) +static __ri void vtlb_Miss(u32 addr,u32 mode) { if( IsDevBuild ) Cpu->ThrowCpuException( R5900Exception::TLBMiss( addr, !!mode ) ); @@ -241,7 +241,7 @@ static __forceinline void vtlb_Miss(u32 addr,u32 mode) // BusError exception: more serious than a TLB miss. If properly emulated the PS2 kernel // itself would invoke a diagnostic/assertion screen that displays the cpu state at the // time of the exception. -static __forceinline void vtlb_BusError(u32 addr,u32 mode) +static __ri void vtlb_BusError(u32 addr,u32 mode) { // Throwing exceptions isn't reliable *yet* because memory ops don't flush // the PC prior to invoking the indirect handlers. @@ -297,17 +297,17 @@ template void __fastcall vtlbUnmappedPWrite128(u32 addr,const mem128_t* data) { vtlb_BusError(addr|saddr,1); } ///// VTLB mapping errors (unmapped address spaces) -mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { Console.Error("vtlbDefaultPhyRead8: 0x%X",addr); verify(false); return -1; } -mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { Console.Error("vtlbDefaultPhyRead16: 0x%X",addr); verify(false); return -1; } -mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { Console.Error("vtlbDefaultPhyRead32: 0x%X",addr); verify(false); return -1; } -void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { Console.Error("vtlbDefaultPhyRead64: 0x%X",addr); verify(false); } -void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { Console.Error("vtlbDefaultPhyRead128: 0x%X",addr); verify(false); } +static mem8_t __fastcall vtlbDefaultPhyRead8(u32 addr) { Console.Error("vtlbDefaultPhyRead8: 0x%X",addr); verify(false); return -1; } +static mem16_t __fastcall vtlbDefaultPhyRead16(u32 addr) { Console.Error("vtlbDefaultPhyRead16: 0x%X",addr); verify(false); return -1; } +static mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr) { Console.Error("vtlbDefaultPhyRead32: 0x%X",addr); verify(false); return -1; } +static void __fastcall vtlbDefaultPhyRead64(u32 addr,mem64_t* data) { Console.Error("vtlbDefaultPhyRead64: 0x%X",addr); verify(false); } +static void __fastcall vtlbDefaultPhyRead128(u32 addr,mem128_t* data) { Console.Error("vtlbDefaultPhyRead128: 0x%X",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { Console.Error("vtlbDefaultPhyWrite8: 0x%X",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { Console.Error("vtlbDefaultPhyWrite16: 0x%X",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { Console.Error("vtlbDefaultPhyWrite32: 0x%X",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console.Error("vtlbDefaultPhyWrite64: 0x%X",addr); verify(false); } -void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console.Error("vtlbDefaultPhyWrite128: 0x%X",addr); verify(false); } +static void __fastcall vtlbDefaultPhyWrite8(u32 addr,mem8_t data) { Console.Error("vtlbDefaultPhyWrite8: 0x%X",addr); verify(false); } +static void __fastcall vtlbDefaultPhyWrite16(u32 addr,mem16_t data) { Console.Error("vtlbDefaultPhyWrite16: 0x%X",addr); verify(false); } +static void __fastcall vtlbDefaultPhyWrite32(u32 addr,mem32_t data) { Console.Error("vtlbDefaultPhyWrite32: 0x%X",addr); verify(false); } +static void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console.Error("vtlbDefaultPhyWrite64: 0x%X",addr); verify(false); } +static void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console.Error("vtlbDefaultPhyWrite128: 0x%X",addr); verify(false); } // =========================================================================================== @@ -436,7 +436,7 @@ void vtlb_Mirror(u32 new_region,u32 start,u32 size) } } -__forceinline void* vtlb_GetPhyPtr(u32 paddr) +__fi void* vtlb_GetPhyPtr(u32 paddr) { if (paddr>=VTLB_PMAP_SZ || vtlbdata.pmap[paddr>>VTLB_PAGE_BITS]<0) return NULL; diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 172cba2e6b..502242766d 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -78,7 +78,7 @@ public: int LastIndex (u32 startpc) const; BASEBLOCKEX* GetByX86(uptr ip); - __forceinline int Index (u32 startpc) const + __fi int Index (u32 startpc) const { int idx = LastIndex(startpc); // fixme: I changed the parenthesis to be unambiguous, but this needs to be checked to see if ((x or y or z) and w) @@ -91,19 +91,19 @@ public: return idx; } - __forceinline BASEBLOCKEX* operator[](int idx) + __fi BASEBLOCKEX* operator[](int idx) { if (idx < 0 || idx >= (int)blocks.size()) return 0; return &blocks[idx]; } - __forceinline BASEBLOCKEX* Get(u32 startpc) + __fi BASEBLOCKEX* Get(u32 startpc) { return (*this)[Index(startpc)]; } - __forceinline void Remove(int idx) + __fi void Remove(int idx) { //u32 startpc = blocks[idx].startpc; std::pair range = links.equal_range(blocks[idx].startpc); @@ -127,7 +127,7 @@ public: void Link(u32 pc, s32* jumpptr); - __forceinline void Reset() + __fi void Reset() { blocks.clear(); links.clear(); diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index b5ed0b4f94..607bd0a046 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -51,7 +51,7 @@ void _initXMMregs() { s_xmmchecknext = 0; } -__forceinline void* _XMMGetAddr(int type, int reg, VURegs *VU) +__fi void* _XMMGetAddr(int type, int reg, VURegs *VU) { switch (type) { case XMMTYPE_VFREG: diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index c934a65296..bf4d8bfbce 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -100,7 +100,7 @@ #define X86TYPE_VU1 0x80 //#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI) -static __forceinline int X86_ISVI(int type) +static __fi int X86_ISVI(int type) { return ((type&~X86TYPE_VU1) == X86TYPE_VI); } @@ -233,12 +233,12 @@ extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern void _recFillRegister(EEINST& pinst, int type, int reg, int write); -static __forceinline bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0)); } -static __forceinline bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE2)); } -static __forceinline bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } +static __fi bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0)); } +static __fi bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE2)); } +static __fi bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } -static __forceinline bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } -static __forceinline bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } +static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } +static __fi bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } extern u32 g_recWriteback; // used for jumps (VUrec mess!) @@ -277,17 +277,17 @@ void SetFPUstate(); #define MMX_COP0 96 #define MMX_TEMP 0x7f -static __forceinline bool MMX_IS32BITS(s32 x) +static __fi bool MMX_IS32BITS(s32 x) { return (((x >= MMX_FPU) && (x < MMX_COP0 + 32)) || (x == MMX_FPUACC)); } -static __forceinline bool MMX_ISGPR(s32 x) +static __fi bool MMX_ISGPR(s32 x) { return ((x >= MMX_GPR) && (x < MMX_GPR + 34)); } -static __forceinline bool MMX_ISGPR(u32 x) +static __fi bool MMX_ISGPR(u32 x) { return (x < MMX_GPR + 34); } diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index e5f1662cf3..07a9a65dc3 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -340,7 +340,7 @@ REC_FPUFUNC(RSQRT_S); //------------------------------------------------------------------ static __aligned16 u64 FPU_FLOAT_TEMP[2]; -__forceinline void fpuFloat4(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax +__fi void fpuFloat4(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax int t1reg = _allocTempXMMreg(XMMT_FPS, -1); if (t1reg >= 0) { SSE_MOVSS_XMM_to_XMM(t1reg, regd); @@ -363,20 +363,20 @@ __forceinline void fpuFloat4(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf - } } -__forceinline void fpuFloat(int regd) { // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax +__fi void fpuFloat(int regd) { // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax if (CHECK_FPU_OVERFLOW) { SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); // MIN() must be before MAX()! So that NaN's become +Maximum SSE_MAXSS_M32_to_XMM(regd, (uptr)&g_minvals[0]); } } -__forceinline void fpuFloat2(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax +__fi void fpuFloat2(int regd) { // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax if (CHECK_FPU_OVERFLOW) { fpuFloat4(regd); } } -__forceinline void fpuFloat3(int regd) { +__fi void fpuFloat3(int regd) { // This clamp function is used in the recC_xx opcodes // Rule of Rose needs clamping or else it crashes (minss or maxss both fix the crash) // Tekken 5 has disappearing characters unless preserving NaN sign (fpuFloat4() preserves NaN sign). diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 38eb592502..87c547271f 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -901,7 +901,7 @@ static __noinline s32 recExecuteBlock( s32 eeCycles ) } // Returns the offset to the next instruction after any cleared memory -static __forceinline u32 psxRecClearMem(u32 pc) +static __fi u32 psxRecClearMem(u32 pc) { BASEBLOCK* pblock; @@ -948,7 +948,7 @@ static __forceinline u32 psxRecClearMem(u32 pc) return upperextent - pc; } -static __forceinline void recClearIOP(u32 Addr, u32 Size) +static __fi void recClearIOP(u32 Addr, u32 Size) { u32 pc = Addr; while (pc < Addr + Size*4) @@ -1008,7 +1008,7 @@ void psxSetBranchImm( u32 imm ) recBlocks.Link(HWADDR(imm), xJcc32()); } -static __forceinline u32 psxScaleBlockCycles() +static __fi u32 psxScaleBlockCycles() { return s_psxBlockCycles; } diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index f3d8330de6..e1bdeaafd1 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -466,7 +466,7 @@ void _initMMXregs() s_mmxchecknext = 0; } -__forceinline void* _MMXGetAddr(int reg) +__fi void* _MMXGetAddr(int reg) { pxAssert( reg != MMX_TEMP ); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index a5e8eca62c..f46ebb9c36 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -817,7 +817,7 @@ void R5900::Dynarec::OpcodeImpl::recBREAK( void ) } // Clears the recLUT table so that all blocks are mapped to the JIT recompiler by default. -static __releaseinline void ClearRecLUT(BASEBLOCK* base, int count) +static __ri void ClearRecLUT(BASEBLOCK* base, int count) { for (int i = 0; i < count; i++) base[i].SetFnptr((uptr)JITCompile); diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index b49abeae5c..6a9e0f6445 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -63,14 +63,14 @@ const __aligned(32) mVU_Globals mVUglob = { // Micro VU - Main Functions //------------------------------------------------------------------ -_f void mVUthrowHardwareDeficiency(const wxChar* extFail, int vuIndex) { +__fi void mVUthrowHardwareDeficiency(const wxChar* extFail, int vuIndex) { throw Exception::HardwareDeficiency() .SetDiagMsg(wxsFormat(L"microVU%d recompiler init failed: %s is not available.", vuIndex, extFail)) .SetUserMsg(wxsFormat(_("%s Extensions not found. microVU requires a host CPU with MMX, SSE, and SSE2 extensions."), extFail )); } // Only run this once per VU! ;) -_f void mVUinit(VURegs* vuRegsPtr, int vuIndex) { +__fi void mVUinit(VURegs* vuRegsPtr, int vuIndex) { if(!x86caps.hasMultimediaExtensions) mVUthrowHardwareDeficiency( L"MMX", vuIndex ); if(!x86caps.hasStreamingSIMDExtensions) mVUthrowHardwareDeficiency( L"SSE", vuIndex ); @@ -110,7 +110,7 @@ _f void mVUinit(VURegs* vuRegsPtr, int vuIndex) { } // Resets Rec Data -_f void mVUreset(mV) { +__fi void mVUreset(mV) { // Clear All Program Data //memset(&mVU->prog, 0, sizeof(mVU->prog)); @@ -146,7 +146,7 @@ _f void mVUreset(mV) { } // Free Allocated Resources -_f void mVUclose(mV) { +__fi void mVUclose(mV) { if (mVU->dispCache) { HostSys::Munmap(mVU->dispCache, mVUdispCacheSize); mVU->dispCache = NULL; } if (mVU->cache) { HostSys::Munmap(mVU->cache, mVU->cacheSize); mVU->cache = NULL; } @@ -194,7 +194,7 @@ void mVUresizeCache(mV, u32 size) { } // Clears Block Data in specified range -_f void mVUclear(mV, u32 addr, u32 size) { +__fi void mVUclear(mV, u32 addr, u32 size) { if (!mVU->prog.cleared) { memzero(mVU->prog.lpState); // Clear pipeline state mVU->prog.cleared = 1; // Next execution searches/creates a new microprogram @@ -210,12 +210,12 @@ _f void mVUclear(mV, u32 addr, u32 size) { //------------------------------------------------------------------ // Finds and Ages/Kills Programs if they haven't been used in a while. -_f void mVUvsyncUpdate(mV) { +__fi void mVUvsyncUpdate(mV) { //mVU->prog.curFrame++; } // Deletes a program -_mVUt _f void mVUdeleteProg(microProgram*& prog) { +_mVUt __fi void mVUdeleteProg(microProgram*& prog) { microVU* mVU = mVUx; for (u32 i = 0; i < (mVU->progSize / 2); i++) { safe_delete(prog->block[i]); @@ -225,7 +225,7 @@ _mVUt _f void mVUdeleteProg(microProgram*& prog) { } // Creates a new Micro Program -_mVUt _f microProgram* mVUcreateProg(int startPC) { +_mVUt __fi microProgram* mVUcreateProg(int startPC) { microVU* mVU = mVUx; microProgram* prog = (microProgram*)_aligned_malloc(sizeof(microProgram), 64); memzero_ptr(prog); @@ -242,7 +242,7 @@ _mVUt _f microProgram* mVUcreateProg(int startPC) { } // Caches Micro Program -_mVUt _f void mVUcacheProg(microProgram& prog) { +_mVUt __fi void mVUcacheProg(microProgram& prog) { microVU* mVU = mVUx; if (!vuIndex) memcpy_const(prog.data, mVU->regs->Micro, 0x1000); else memcpy_const(prog.data, mVU->regs->Micro, 0x4000); @@ -250,7 +250,7 @@ _mVUt _f void mVUcacheProg(microProgram& prog) { } // Compare partial program by only checking compiled ranges... -_mVUt _f bool mVUcmpPartial(microProgram& prog) { +_mVUt __fi bool mVUcmpPartial(microProgram& prog) { microVU* mVU = mVUx; deque::const_iterator it(prog.ranges->begin()); for ( ; it != prog.ranges->end(); ++it) { @@ -263,7 +263,7 @@ _mVUt _f bool mVUcmpPartial(microProgram& prog) { } // Compare Cached microProgram to mVU->regs->Micro -_mVUt _f bool mVUcmpProg(microProgram& prog, const bool cmpWholeProg) { +_mVUt __fi bool mVUcmpProg(microProgram& prog, const bool cmpWholeProg) { microVU* mVU = mVUx; if ((cmpWholeProg && !memcmp_mmx((u8*)prog.data, mVU->regs->Micro, mVU->microMemSize)) || (!cmpWholeProg && mVUcmpPartial(prog))) { @@ -276,7 +276,7 @@ _mVUt _f bool mVUcmpProg(microProgram& prog, const bool cmpWholeProg) { } // Searches for Cached Micro Program and sets prog.cur to it (returns entry-point to program) -_mVUt _f void* mVUsearchProg(u32 startPC, uptr pState) { +_mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) { microVU* mVU = mVUx; microProgramQuick& quick = mVU->prog.quick[startPC/8]; microProgramList* list = mVU->prog.prog [startPC/8]; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index f85b29ea3a..cc08183049 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -78,7 +78,7 @@ public: } return thisBlock; } - __releaseinline microBlock* search(microRegInfo* pState) { + __ri microBlock* search(microRegInfo* pState) { microBlockLink* linkI = &blockList; if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) for (int i = 0; i <= listI; i++) { @@ -204,27 +204,27 @@ extern __aligned16 microVU microVU1; int mVUdebugNow = 0; // Main Functions -_f void mVUinit(VURegs*, int); -_f void mVUreset(mV); -_f void mVUclose(mV); -_f void mVUclear(mV, u32, u32); - void mVUresizeCache(mV, u32); -_f void* mVUblockFetch(microVU* mVU, u32 startPC, uptr pState); -_mVUt void* __fastcall mVUcompileJIT(u32 startPC, uptr pState); +extern void mVUinit(VURegs*, int); +extern void mVUreset(mV); +extern void mVUclose(mV); +extern void mVUclear(mV, u32, u32); +extern void mVUresizeCache(mV, u32); +extern void* mVUblockFetch(microVU* mVU, u32 startPC, uptr pState); +_mVUt extern void* __fastcall mVUcompileJIT(u32 startPC, uptr pState); // Prototypes for Linux -void __fastcall mVUcleanUpVU0(); -void __fastcall mVUcleanUpVU1(); +extern void __fastcall mVUcleanUpVU0(); +extern void __fastcall mVUcleanUpVU1(); mVUop(mVUopU); mVUop(mVUopL); // Private Functions -_mVUt _f void mVUcacheProg (microProgram& prog); -_mVUt _f void mVUdeleteProg(microProgram*& prog); -_mVUt _f void* mVUsearchProg(u32 startPC, uptr pState); -_mVUt _f microProgram* mVUfindLeastUsedProg(); -void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); -void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); +_mVUt extern void mVUcacheProg (microProgram& prog); +_mVUt extern void mVUdeleteProg(microProgram*& prog); +_mVUt extern void* mVUsearchProg(u32 startPC, uptr pState); +_mVUt extern microProgram* mVUfindLeastUsedProg(); +extern void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); +extern void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); // recCall Function Pointer typedef void (__fastcall *mVUrecCall)(u32, u32); diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index e9e8ee7c86..7f01b560e5 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -23,14 +23,14 @@ // Flag Allocators //------------------------------------------------------------------ -_f static const x32& getFlagReg(uint fInst) +__fi static const x32& getFlagReg(uint fInst) { static const x32* const gprF_crap[4] = { &gprF0, &gprF1, &gprF2, &gprF3 }; pxAssume(fInst < 4); return *gprF_crap[fInst]; } -_f void setBitSFLAG(const x32& reg, const x32& regT, int bitTest, int bitSet) +__fi void setBitSFLAG(const x32& reg, const x32& regT, int bitTest, int bitSet) { xTEST(regT, bitTest); xForwardJZ8 skip; @@ -38,7 +38,7 @@ _f void setBitSFLAG(const x32& reg, const x32& regT, int bitTest, int bitSet) skip.SetTarget(); } -_f void setBitFSEQ(const x32& reg, int bitX) +__fi void setBitFSEQ(const x32& reg, int bitX) { xTEST(reg, bitX); xForwardJump8 skip(Jcc_Zero); @@ -46,18 +46,18 @@ _f void setBitFSEQ(const x32& reg, int bitX) skip.SetTarget(); } -_f void mVUallocSFLAGa(const x32& reg, int fInstance) +__fi void mVUallocSFLAGa(const x32& reg, int fInstance) { xMOV(reg, getFlagReg(fInstance)); } -_f void mVUallocSFLAGb(const x32& reg, int fInstance) +__fi void mVUallocSFLAGb(const x32& reg, int fInstance) { xMOV(getFlagReg(fInstance), reg); } // Normalize Status Flag -_f void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance) +__ri void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance) { xXOR(reg, reg); mVUallocSFLAGa(regT, fInstance); @@ -71,7 +71,7 @@ _f void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance) } // Denormalizes Status Flag -_f void mVUallocSFLAGd(u32* memAddr, bool setAllflags) { +__ri void mVUallocSFLAGd(u32* memAddr, bool setAllflags) { // Cannot use EBP (gprF1) here; as this function is used by mVU0 macro and // the EErec needs EBP preserved. @@ -101,25 +101,25 @@ _f void mVUallocSFLAGd(u32* memAddr, bool setAllflags) { } } -_f void mVUallocMFLAGa(mV, const x32& reg, int fInstance) +__fi void mVUallocMFLAGa(mV, const x32& reg, int fInstance) { xMOVZX(reg, ptr16[&mVU->macFlag[fInstance]]); } -_f void mVUallocMFLAGb(mV, const x32& reg, int fInstance) +__fi void mVUallocMFLAGb(mV, const x32& reg, int fInstance) { //xAND(reg, 0xffff); if (fInstance < 4) xMOV(ptr32[&mVU->macFlag[fInstance]], reg); // microVU else xMOV(ptr32[&mVU->regs->VI[REG_MAC_FLAG].UL], reg); // macroVU } -_f void mVUallocCFLAGa(mV, const x32& reg, int fInstance) +__fi void mVUallocCFLAGa(mV, const x32& reg, int fInstance) { if (fInstance < 4) xMOV(reg, ptr32[&mVU->clipFlag[fInstance]]); // microVU else xMOV(reg, ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL]); // macroVU } -_f void mVUallocCFLAGb(mV, const x32& reg, int fInstance) +__fi void mVUallocCFLAGb(mV, const x32& reg, int fInstance) { if (fInstance < 4) xMOV(ptr32[&mVU->clipFlag[fInstance]], reg); // microVU else xMOV(ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL], reg); // macroVU @@ -129,7 +129,7 @@ _f void mVUallocCFLAGb(mV, const x32& reg, int fInstance) // VI Reg Allocators //------------------------------------------------------------------ -_f void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) +__ri void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) { if (!_reg_) xXOR(GPRreg, GPRreg); @@ -140,7 +140,7 @@ _f void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) xMOVZX(GPRreg, ptr16[&mVU->regs->VI[_reg_].UL]); } -_f void mVUallocVIb(mV, const x32& GPRreg, int _reg_) +__ri void mVUallocVIb(mV, const x32& GPRreg, int _reg_) { if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) xMOVZX(gprT3, ptr16[&mVU->regs->VI[_reg_].UL]); @@ -154,19 +154,19 @@ _f void mVUallocVIb(mV, const x32& GPRreg, int _reg_) // P/Q Reg Allocators //------------------------------------------------------------------ -_f void getPreg(mV, const xmm& reg) +__fi void getPreg(mV, const xmm& reg) { mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -_f void getQreg(const xmm& reg, int qInstance) +__fi void getQreg(const xmm& reg, int qInstance) { mVUunpack_xyzw(reg, xmmPQ, qInstance); /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -_f void writeQreg(const xmm& reg, int qInstance) +__ri void writeQreg(const xmm& reg, int qInstance) { if (qInstance) { if (!x86caps.hasStreamingSIMD4Extensions) { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 9880d382df..0bc65f3931 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -118,7 +118,7 @@ // FMAC1 - Normal FMAC Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { +__fi void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { sFLAG.doFlag = 1; analyzeReg1(Fs, mVUup.VF_read[0]); analyzeReg1(Ft, mVUup.VF_read[1]); @@ -129,7 +129,7 @@ _f void mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { // FMAC2 - ABS/FTOI/ITOF Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeFMAC2(mV, int Fs, int Ft) { +__fi void mVUanalyzeFMAC2(mV, int Fs, int Ft) { analyzeReg1(Fs, mVUup.VF_read[0]); analyzeReg2(Ft, mVUup.VF_write, 0); } @@ -138,7 +138,7 @@ _f void mVUanalyzeFMAC2(mV, int Fs, int Ft) { // FMAC3 - BC(xyzw) FMAC Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { +__fi void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { sFLAG.doFlag = 1; analyzeReg1(Fs, mVUup.VF_read[0]); analyzeReg3(Ft, mVUup.VF_read[1]); @@ -149,7 +149,7 @@ _f void mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { // FMAC4 - Clip FMAC Opcode //------------------------------------------------------------------ -_f void mVUanalyzeFMAC4(mV, int Fs, int Ft) { +__fi void mVUanalyzeFMAC4(mV, int Fs, int Ft) { cFLAG.doFlag = 1; analyzeReg1(Fs, mVUup.VF_read[0]); analyzeReg4(Ft, mVUup.VF_read[1]); @@ -159,20 +159,20 @@ _f void mVUanalyzeFMAC4(mV, int Fs, int Ft) { // IALU - IALU Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeIALU1(mV, int Id, int Is, int It) { +__fi void mVUanalyzeIALU1(mV, int Id, int Is, int It) { if (!Id) { mVUlow.isNOP = 1; } analyzeVIreg1(Is, mVUlow.VI_read[0]); analyzeVIreg1(It, mVUlow.VI_read[1]); analyzeVIreg2(Id, mVUlow.VI_write, 1); } -_f void mVUanalyzeIALU2(mV, int Is, int It) { +__fi void mVUanalyzeIALU2(mV, int Is, int It) { if (!It) { mVUlow.isNOP = 1; } analyzeVIreg1(Is, mVUlow.VI_read[0]); analyzeVIreg2(It, mVUlow.VI_write, 1); } -_f void mVUanalyzeIADDI(mV, int Is, int It, s16 imm) { +__fi void mVUanalyzeIADDI(mV, int Is, int It, s16 imm) { mVUanalyzeIALU2(mVU, Is, It); if (!Is) { setConstReg(It, imm); } } @@ -181,7 +181,7 @@ _f void mVUanalyzeIADDI(mV, int Is, int It, s16 imm) { // MR32 - MR32 Opcode //------------------------------------------------------------------ -_f void mVUanalyzeMR32(mV, int Fs, int Ft) { +__fi void mVUanalyzeMR32(mV, int Fs, int Ft) { if (!Ft) { mVUlow.isNOP = 1; } analyzeReg6(Fs, mVUlow.VF_read[0]); analyzeReg2(Ft, mVUlow.VF_write, 1); @@ -191,7 +191,7 @@ _f void mVUanalyzeMR32(mV, int Fs, int Ft) { // FDIV - DIV/SQRT/RSQRT Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { +__fi void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]); analyzeReg5(Ft, Ftf, mVUlow.VF_read[1]); analyzeQreg(xCycles); @@ -201,12 +201,12 @@ _f void mVUanalyzeFDIV(mV, int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { // EFU - EFU Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) { +__fi void mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) { analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]); analyzePreg(xCycles); } -_f void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { +__fi void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { analyzeReg1(Fs, mVUlow.VF_read[0]); analyzePreg(xCycles); } @@ -215,7 +215,7 @@ _f void mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { // MFP - MFP Opcode //------------------------------------------------------------------ -_f void mVUanalyzeMFP(mV, int Ft) { +__fi void mVUanalyzeMFP(mV, int Ft) { if (!Ft) { mVUlow.isNOP = 1; } analyzeReg2(Ft, mVUlow.VF_write, 1); } @@ -224,7 +224,7 @@ _f void mVUanalyzeMFP(mV, int Ft) { // MOVE - MOVE Opcode //------------------------------------------------------------------ -_f void mVUanalyzeMOVE(mV, int Fs, int Ft) { +__fi void mVUanalyzeMOVE(mV, int Fs, int Ft) { if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; } analyzeReg1(Fs, mVUlow.VF_read[0]); analyzeReg2(Ft, mVUlow.VF_write, 1); @@ -234,7 +234,7 @@ _f void mVUanalyzeMOVE(mV, int Fs, int Ft) { // LQx - LQ/LQD/LQI Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { +__fi void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { analyzeVIreg1(Is, mVUlow.VI_read[0]); analyzeReg2 (Ft, mVUlow.VF_write, 1); if (!Ft) { if (writeIs && Is) { mVUlow.noWriteVF = 1; } else { mVUlow.isNOP = 1; } } @@ -245,7 +245,7 @@ _f void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { // SQx - SQ/SQD/SQI Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { +__fi void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { analyzeReg1 (Fs, mVUlow.VF_read[0]); analyzeVIreg1(It, mVUlow.VI_read[0]); if (writeIt) { analyzeVIreg2(It, mVUlow.VI_write, 1); } @@ -255,12 +255,12 @@ _f void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { // R*** - R Reg Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeR1(mV, int Fs, int Fsf) { +__fi void mVUanalyzeR1(mV, int Fs, int Fsf) { analyzeReg5(Fs, Fsf, mVUlow.VF_read[0]); analyzeRreg(); } -_f void mVUanalyzeR2(mV, int Ft, bool canBeNOP) { +__fi void mVUanalyzeR2(mV, int Ft, bool canBeNOP) { if (!Ft) { if (canBeNOP) { mVUlow.isNOP = 1; } else { mVUlow.noWriteVF = 1; } } analyzeReg2(Ft, mVUlow.VF_write, 1); analyzeRreg(); @@ -269,7 +269,7 @@ _f void mVUanalyzeR2(mV, int Ft, bool canBeNOP) { //------------------------------------------------------------------ // Sflag - Status Flag Opcodes //------------------------------------------------------------------ -_f void flagSet(mV, bool setMacFlag) { +__ri void flagSet(mV, bool setMacFlag) { int curPC = iPC; for (int i = mVUcount, j = 0; i > 0; i--, j++) { j += mVUstall; @@ -283,7 +283,7 @@ _f void flagSet(mV, bool setMacFlag) { iPC = curPC; } -_f void mVUanalyzeSflag(mV, int It) { +__ri void mVUanalyzeSflag(mV, int It) { mVUlow.readFlags = 1; analyzeVIreg2(It, mVUlow.VI_write, 1); if (!It) { mVUlow.isNOP = 1; } @@ -295,7 +295,7 @@ _f void mVUanalyzeSflag(mV, int It) { } } -_f void mVUanalyzeFSSET(mV) { +__ri void mVUanalyzeFSSET(mV) { mVUlow.isFSSET = 1; mVUlow.readFlags = 1; } @@ -304,7 +304,7 @@ _f void mVUanalyzeFSSET(mV) { // Mflag - Mac Flag Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeMflag(mV, int Is, int It) { +__ri void mVUanalyzeMflag(mV, int Is, int It) { mVUlow.readFlags = 1; analyzeVIreg1(Is, mVUlow.VI_read[0]); analyzeVIreg2(It, mVUlow.VI_write, 1); @@ -320,7 +320,7 @@ _f void mVUanalyzeMflag(mV, int Is, int It) { // Cflag - Clip Flag Opcodes //------------------------------------------------------------------ -_f void mVUanalyzeCflag(mV, int It) { +__fi void mVUanalyzeCflag(mV, int It) { mVUinfo.swapOps = 1; mVUlow.readFlags = 1; if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 4; } @@ -331,7 +331,7 @@ _f void mVUanalyzeCflag(mV, int It) { // XGkick //------------------------------------------------------------------ -_f void mVUanalyzeXGkick(mV, int Fs, int xCycles) { +__fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) { analyzeVIreg1(Fs, mVUlow.VI_read[0]); analyzeXGkick1(); analyzeXGkick2(xCycles); @@ -347,7 +347,7 @@ _f void mVUanalyzeXGkick(mV, int Fs, int xCycles) { // Branches - Branch Opcodes //------------------------------------------------------------------ -_f void analyzeBranchVI(mV, int xReg, bool &infoVar) { +static void analyzeBranchVI(mV, int xReg, bool &infoVar) { if (!xReg) return; int i, j = 0; int iEnd = 4; @@ -390,7 +390,7 @@ _f void analyzeBranchVI(mV, int xReg, bool &infoVar) { /* // Dead Code... the old version of analyzeBranchVI() -_f void analyzeBranchVI(mV, int xReg, bool &infoVar) { +__fi void analyzeBranchVI(mV, int xReg, bool &infoVar) { if (!xReg) return; int i; int iEnd = aMin(5, (mVUcount+1)); @@ -427,7 +427,7 @@ _f void analyzeBranchVI(mV, int xReg, bool &infoVar) { */ // Branch in Branch Delay-Slots -_f int mVUbranchCheck(mV) { +__ri int mVUbranchCheck(mV) { if (!mVUcount) return 0; incPC(-2); if (mVUlow.branch) { @@ -443,14 +443,14 @@ _f int mVUbranchCheck(mV) { return 0; } -_f void mVUanalyzeCondBranch1(mV, int Is) { +__fi void mVUanalyzeCondBranch1(mV, int Is) { analyzeVIreg1(Is, mVUlow.VI_read[0]); if (!mVUstall && !mVUbranchCheck(mVU)) { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); } } -_f void mVUanalyzeCondBranch2(mV, int Is, int It) { +__fi void mVUanalyzeCondBranch2(mV, int Is, int It) { analyzeVIreg1(Is, mVUlow.VI_read[0]); analyzeVIreg1(It, mVUlow.VI_read[1]); if (!mVUstall && !mVUbranchCheck(mVU)) { @@ -459,7 +459,7 @@ _f void mVUanalyzeCondBranch2(mV, int Is, int It) { } } -_f void mVUanalyzeNormBranch(mV, int It, bool isBAL) { +__fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) { mVUbranchCheck(mVU); if (isBAL) { analyzeVIreg2(It, mVUlow.VI_write, 1); @@ -467,7 +467,7 @@ _f void mVUanalyzeNormBranch(mV, int It, bool isBAL) { } } -_f void mVUanalyzeJump(mV, int Is, int It, bool isJALR) { +__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) { mVUbranchCheck(mVU); mVUlow.branch = (isJALR) ? 10 : 9; if (mVUconstReg[Is].isValid && CHECK_VU_CONSTPROP) { diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 0686a2e3af..16e1cc4097 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -31,19 +31,19 @@ // Messages Called at Execution Time... //------------------------------------------------------------------ -void __fastcall mVUbadOp0(mV) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); } -void __fastcall mVUbadOp1(mV) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); } -void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); } -void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); } -void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); } -void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); } +static void __fastcall mVUbadOp0(mV) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); } +static void __fastcall mVUbadOp1(mV) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); } +static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); } +static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); } +static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); } +static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); } //------------------------------------------------------------------ // Helper Functions //------------------------------------------------------------------ // Used by mVUsetupRange -_f void mVUcheckIsSame(mV) { +static __fi void mVUcheckIsSame(mV) { if (mVU->prog.isSame == -1) { mVU->prog.isSame = !memcmp_mmx((u8*)mVUcurProg.data, mVU->regs->Micro, mVU->microMemSize); } @@ -55,7 +55,7 @@ _f void mVUcheckIsSame(mV) { } // Sets up microProgram PC ranges based on whats been recompiled -void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) { +static void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) { deque*& ranges = mVUcurProg.ranges; pc &= mVU->microMemSize - 8; @@ -106,7 +106,7 @@ void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) { } } -_f void startLoop(mV) { +static __fi void startLoop(mV) { if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); } if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); } if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); } @@ -114,7 +114,7 @@ _f void startLoop(mV) { memzero(mVUregsTemp); } -void doIbit(mV) { +static void doIbit(mV) { if (mVUup.iBit) { incPC(-1); u32 tempI; @@ -131,7 +131,7 @@ void doIbit(mV) { } } -void doSwapOp(mV) { +static void doSwapOp(mV) { if (mVUinfo.backupVF && !mVUlow.noWriteVF) { DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC); @@ -161,7 +161,7 @@ void doSwapOp(mV) { } // If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2) -_f void mVUcheckBadOp(mV) { +static __fi void mVUcheckBadOp(mV) { if (mVUinfo.isBadOp && mVUcount == 0) { mVUinfo.isEOB = true; Console.Warning("microVU Warning: First Instruction of block contains illegal opcode..."); @@ -169,7 +169,7 @@ _f void mVUcheckBadOp(mV) { } // Prints msg when exiting block early if 1st op was a bad opcode (Dawn of Mana Level 2) -_f void handleBadOp(mV, int count) { +static __fi void handleBadOp(mV, int count) { if (mVUinfo.isBadOp && count == 0) { xMOV(gprT2, (uptr)mVU); if (!isVU1) xCALL(mVUbadOp0); @@ -177,7 +177,7 @@ _f void handleBadOp(mV, int count) { } } -_f void branchWarning(mV) { +static __ri void branchWarning(mV) { incPC(-2); if (mVUup.eBit && mVUbranch) { incPC(2); @@ -193,14 +193,14 @@ _f void branchWarning(mV) { } } -_f void eBitPass1(mV, int& branch) { +static __fi void eBitPass1(mV, int& branch) { if (mVUregs.blockType != 1) { branch = 1; mVUup.eBit = 1; } } -_f void eBitWarning(mV) { +static __ri void eBitWarning(mV) { if (mVUpBlock->pState.blockType == 1) Console.Error("microVU%d Warning: Branch, E-bit, Branch! [%04x]", mVU->index, xPC); if (mVUpBlock->pState.blockType == 2) Console.Error("microVU%d Warning: Branch, Branch, Branch! [%04x]", mVU->index, xPC); incPC(2); @@ -212,7 +212,7 @@ _f void eBitWarning(mV) { } // Optimizes the End Pipeline State Removing Unnecessary Info -_f void mVUoptimizePipeState(mV) { +static __fi void mVUoptimizePipeState(mV) { for (int i = 0; i < 32; i++) { optimizeReg(mVUregs.VF[i].x); optimizeReg(mVUregs.VF[i].y); @@ -227,7 +227,7 @@ _f void mVUoptimizePipeState(mV) { mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info } -_f void mVUincCycles(mV, int x) { +static __fi void mVUincCycles(mV, int x) { mVUcycles += x; for (int z = 31; z > 0; z--) { calcCycles(mVUregs.VF[z].x, x); @@ -300,12 +300,12 @@ void mVUsetCycles(mV) { } // vu0 is allowed to exit early, so are dev builds (for inf loops) -_f bool doEarlyExit(microVU* mVU) { +static __fi bool doEarlyExit(microVU* mVU) { return IsDevBuild || !isVU1; } // Saves Pipeline State for resuming from early exits -_f void mVUsavePipelineState(microVU* mVU) { +static __fi void mVUsavePipelineState(microVU* mVU) { u32* lpS = (u32*)&mVU->prog.lpState.vi15; for (int i = 0; i < (sizeof(microRegInfo)-4)/4; i++, lpS++) { xMOV(ptr32[lpS], lpS[0]); @@ -313,7 +313,7 @@ _f void mVUsavePipelineState(microVU* mVU) { } // Prints Start/End PC of blocks executed, for debugging... -void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) { +static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) { if (mVUdebugNow) { xMOV(gprT2, xPC); if (isEndPC) xCALL(mVUprintPC2); @@ -322,7 +322,7 @@ void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) { } // Test cycles to see if we need to exit-early... -void mVUtestCycles(microVU* mVU) { +static void mVUtestCycles(microVU* mVU) { iPC = mVUstartPC; if (doEarlyExit(mVU)) { xCMP(ptr32[&mVU->cycles], 0); @@ -348,7 +348,7 @@ void mVUtestCycles(microVU* mVU) { } // Initialize VI Constants (vi15 propagates through blocks) -_f void mVUinitConstValues(microVU* mVU) { +static __fi void mVUinitConstValues(microVU* mVU) { for (int i = 0; i < 16; i++) { mVUconstReg[i].isValid = 0; mVUconstReg[i].regValue = 0; @@ -358,7 +358,7 @@ _f void mVUinitConstValues(microVU* mVU) { } // Initialize Variables -_f void mVUinitFirstPass(microVU* mVU, uptr pState, u8* thisPtr) { +static __fi void mVUinitFirstPass(microVU* mVU, uptr pState, u8* thisPtr) { mVUstartPC = iPC; // Block Start PC mVUbranch = 0; // Branch Type mVUcount = 0; // Number of instructions ran @@ -466,14 +466,14 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) { } // Returns the entry point of the block (compiles it if not found) -_f void* mVUentryGet(microVU* mVU, microBlockManager* block, u32 startPC, uptr pState) { +__fi void* mVUentryGet(microVU* mVU, microBlockManager* block, u32 startPC, uptr pState) { microBlock* pBlock = block->search((microRegInfo*)pState); if (pBlock) return pBlock->x86ptrStart; else return mVUcompile(mVU, startPC, pState); } // Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr) -_f void* mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) { +__fi void* mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) { if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); } startPC &= mVU->microMemSize-8; diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 184a3d3210..5519f5af1f 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -19,7 +19,7 @@ #pragma once // Sets FDIV Flags at the proper time -_f void mVUdivSet(mV) { +__fi void mVUdivSet(mV) { if (mVUinfo.doDivFlag) { if (!sFLAG.doFlag) { xMOV(getFlagReg(sFLAG.write), getFlagReg(sFLAG.lastWrite)); } xAND(getFlagReg(sFLAG.write), 0xfff3ffff); @@ -29,7 +29,7 @@ _f void mVUdivSet(mV) { // Optimizes out unneeded status flag updates // This can safely be done when there is an FSSET opcode -_f void mVUstatusFlagOp(mV) { +__fi void mVUstatusFlagOp(mV) { int curPC = iPC; int i = mVUcount; bool runLoop = 1; @@ -77,7 +77,7 @@ int sortFlag(int* fFlag, int* bFlag, int cycles) { #define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky) // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! -_f void mVUsetFlags(mV, microFlagCycles& mFC) { +__fi void mVUsetFlags(mV, microFlagCycles& mFC) { int endPC = iPC; u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking @@ -164,7 +164,7 @@ _f void mVUsetFlags(mV, microFlagCycles& mFC) { #define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0]) // Recompiles Code for Proper Flags on Block Linkings -_f void mVUsetupFlags(mV, microFlagCycles& mFC) { +__fi void mVUsetupFlags(mV, microFlagCycles& mFC) { if (__Status) { int bStatus[4]; @@ -283,7 +283,7 @@ void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) { #define branchType3 else // Conditional Branch // Checks if the first ~4 instructions of a block will read flags -_f void mVUsetFlagInfo(mV) { +__fi void mVUsetFlagInfo(mV) { branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); } branchType2 { // This case can possibly be turned off via a hack for a small speedup... if (!mVUlow.constJump.isValid || !CHECK_VU_CONSTPROP) { mVUregs.needExactMatch |= 0x7; } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 1e40e9a557..3fcd9b9d84 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -24,7 +24,7 @@ //------------------------------------------------------------------ // Test if Vector is +/- Zero -_f static void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTemp) +static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTemp) { xXOR.PS(xmmTemp, xmmTemp); xCMPEQ.SS(xmmTemp, xmmReg); @@ -36,7 +36,7 @@ _f static void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTem } // Test if Vector is Negative (Set Flags and Makes Positive) -_f static void testNeg(mV, const xmm& xmmReg, const x32& gprTemp) +static __fi void testNeg(mV, const xmm& xmmReg, const x32& gprTemp) { xMOVMSKPS(gprTemp, xmmReg); xTEST(gprTemp, 1); @@ -156,7 +156,7 @@ mVUop(mVU_RSQRT) { } // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) -_f static void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) { +static __fi void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) { xMOVSS(PQ, Fs); xMUL.SS(PQ, ptr32[mVUglob.T1]); xMOVAPS(t2, Fs); @@ -272,7 +272,7 @@ mVUop(mVU_EEXP) { } // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 -_f void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { +static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { if( x86caps.hasStreamingSIMD4Extensions ) { xDP.PS(Fs, Fs, 0x71); xMOVSS(PQ, Fs); @@ -995,7 +995,7 @@ mVUop(mVU_RINIT) { pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); } } -_f void mVU_RGET_(mV, const x32& Rreg) { +static __fi void mVU_RGET_(mV, const x32& Rreg) { if (!mVUlow.noWriteVF) { const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); xMOVDZX(Ft, Rreg); @@ -1139,7 +1139,7 @@ void __fastcall mVU_XGKICK_(u32 addr) { } } -_f void mVU_XGKICK_DELAY(mV, bool memVI) { +static __fi void mVU_XGKICK_DELAY(mV, bool memVI) { mVUbackupRegs(mVU); if (memVI) xMOV(gprT2, ptr32[&mVU->VIxgkick]); else mVUallocVIa(mVU, gprT2, _Is_); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 0a35cee49b..1e43c41bad 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -127,7 +127,7 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst; // Recursive Inline #ifndef __LINUX__ -#define __recInline __releaseinline +#define __recInline __ri #else #define __recInline inline #endif @@ -209,7 +209,6 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); #define Rmem &mVU->regs->VI[REG_R].UL #define aWrap(x, m) ((x > m) ? 0 : x) #define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4)))) -#define _1mb (0x100000) #define clampE CHECK_VU_EXTRA_OVERFLOW #define elif else if diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index b4c503fdb6..54c36d3a61 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -214,7 +214,7 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) //------------------------------------------------------------------ // Transforms the Address in gprReg to valid VU0/VU1 Address -_f void mVUaddrFix(mV, const x32& gprReg) +__fi void mVUaddrFix(mV, const x32& gprReg) { if (isVU1) { xAND(gprReg, 0x3ff); // wrap around @@ -233,14 +233,14 @@ _f void mVUaddrFix(mV, const x32& gprReg) } // Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI) -_f void mVUbackupRegs(microVU* mVU) +__fi void mVUbackupRegs(microVU* mVU) { mVU->regAlloc->flushAll(); xMOVAPS(ptr128[mVU->xmmPQb], xmmPQ); } // Restore Volatile Regs -_f void mVUrestoreRegs(microVU* mVU) +__fi void mVUrestoreRegs(microVU* mVU) { xMOVAPS(xmmPQ, ptr128[mVU->xmmPQb]); } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 5f6cfd5853..fa473e0980 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -114,7 +114,7 @@ enum clampModes { }; // Prints Opcode to MicroProgram Logs -void mVU_printOP(microVU* mVU, int opCase, const char* opName, bool isACC) { +static void mVU_printOP(microVU* mVU, int opCase, const char* opName, bool isACC) { mVUlog(opName); opCase1 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogFt(); } opCase2 { if (isACC) { mVUlogACC(); } else { mVUlogFd(); } mVUlogBC(); } @@ -123,7 +123,7 @@ void mVU_printOP(microVU* mVU, int opCase, const char* opName, bool isACC) { } // Sets Up Pass1 Info for Normal, BC, I, and Q Cases -void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { +static void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { opCase1 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); } opCase2 { mVUanalyzeFMAC3(mVU, ((isACC) ? 0 : _Fd_), _Fs_, _Ft_); } opCase3 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } @@ -132,7 +132,7 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { } // Safer to force 0 as the result for X minus X than to do actual subtraction -bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) { +static bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) { opCase1 { if ((opType == 1) && (_Ft_ == _Fs_)) { const xmm& Fs = mVU->regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W); @@ -146,7 +146,7 @@ bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) { } // Sets Up Ft Reg for Normal, BC, I, and Q Cases -void setupFtReg(microVU* mVU, xmm& Ft, xmm& tempFt, int opCase) { +static void setupFtReg(microVU* mVU, xmm& Ft, xmm& tempFt, int opCase) { opCase1 { if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } else if (clampE) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; } @@ -167,7 +167,7 @@ void setupFtReg(microVU* mVU, xmm& Ft, xmm& tempFt, int opCase) { } // Normal FMAC Opcodes -void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, const char* opName, int clampType) { +static void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, const char* opName, int clampType) { pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); } pass2 { if (doSafeSub(mVU, opCase, opType, isACC)) return; @@ -205,7 +205,7 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co } // MADDA/MSUBA Opcodes -void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* opName, int clampType) { +static void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* opName, int clampType) { pass1 { setupPass1(mVU, opCase, 1, 0); } pass2 { xmm Fs, Ft, ACC, tempFt; @@ -246,7 +246,7 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op } // MADD Opcodes -void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName, int clampType) { +static void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName, int clampType) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { xmm Fs, Ft, ACC, tempFt; @@ -277,7 +277,7 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName, int cl } // MSUB Opcodes -void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName, int clampType) { +static void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName, int clampType) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { xmm Fs, Ft, Fd, tempFt; diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index 3df7d9fca4..9e8a64fa74 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -21,10 +21,8 @@ #include "x86emitter/x86emitter.h" using namespace x86Emitter; -static const s64 _1mb = 0x100000; #define aMax(x, y) std::max(x,y) #define aMin(x, y) std::min(x,y) -#define _f __forceinline // newVif_HashBucket.h uses this typedef, so it has to be declared first. typedef u32 (__fastcall *nVifCall)(void*, const void*); diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index f13ee980f0..8362ffa78f 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -64,7 +64,7 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \ } -_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const { +__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const { u32 m0 = vB.mask; u32 m1 = m0 & 0xaaaaaaaa; u32 m2 =(~m1>>1) & m0; @@ -194,7 +194,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { xRET(); } -static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) { +static __fi u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) { u8* endPtr; // Check if we need to wrap around VU memory u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit)); if (!isFill) { // Account for skip-cycles @@ -217,7 +217,7 @@ static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) { // [TODO] : Finish implementing support for VIF's growable recBlocks buffer. Currently // it clears the buffer only. -static _f void dVifRecLimit(int idx) { +static __fi void dVifRecLimit(int idx) { if (nVif[idx].recPtr > nVif[idx].recEnd) { DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd); nVif[idx].vifBlocks->clear(); diff --git a/pcsx2/x86/newVif_HashBucket.h b/pcsx2/x86/newVif_HashBucket.h index 66dfa14613..90c0202689 100644 --- a/pcsx2/x86/newVif_HashBucket.h +++ b/pcsx2/x86/newVif_HashBucket.h @@ -61,7 +61,7 @@ public: int quickFind(u32 data) { return mBucket[data % hSize].Size; } - __forceinline T* find(T* dataPtr) { + __fi T* find(T* dataPtr) { u32 d = *((u32*)dataPtr); const SizeChain& bucket( mBucket[d % hSize] ); @@ -77,7 +77,7 @@ public: if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size ); return NULL; } - __forceinline void add(const T& dataPtr) { + __fi void add(const T& dataPtr) { u32 d = (u32&)dataPtr; SizeChain& bucket( mBucket[d % hSize] ); diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index 31aa5e9304..d3c695aa2c 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -48,7 +48,7 @@ __aligned16 const u8 nVifT[16] = { // ---------------------------------------------------------------------------- template< int idx, bool doMode, bool isFill, bool singleUnpack > -__releaseinline void __fastcall _nVifUnpackLoop(const u8 *data, u32 size); +__ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size); typedef void __fastcall FnType_VifUnpackLoop(const u8 *data, u32 size); typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop; @@ -91,11 +91,11 @@ void closeNewVif(int idx) { if (newVifDynaRec) dVifClose(idx); } -static _f u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) { +static __fi u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) { return (u8*)(vuMemBase + ( offset & (vuidx ? 0x3ff0 : 0xff0) )); } -static _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) { +static __fi void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) { pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check ptr += amount; vif->tag.addr += amount; @@ -105,7 +105,7 @@ static _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) { } } -static _f void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) { +static __fi void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) { pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check ptr += 16; vif->tag.addr += 16; @@ -197,7 +197,7 @@ static void setMasks(int idx, const VIFregisters& v) { // "slow" games that need it most). --air template< int idx, bool doMode, bool isFill, bool singleUnpack > -__releaseinline void __fastcall _nVifUnpackLoop(const u8 *data, u32 size) { +__ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size) { const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl; const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl; @@ -250,7 +250,7 @@ __releaseinline void __fastcall _nVifUnpackLoop(const u8 *data, u32 size) { } } -_f void _nVifUnpack(int idx, const u8 *data, u32 size, bool isFill) { +__fi void _nVifUnpack(int idx, const u8 *data, u32 size, bool isFill) { if (useOldUnpack) { if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);