From c56ca4a0fad6ccef8a659f90ac3b65f9bdd6ffaf Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 16 Jul 2016 16:40:29 +0200 Subject: [PATCH 1/3] common: include cpuid.h on unix I tested both clang and gcc. --- common/include/Pcsx2Defs.h | 6 +----- common/include/intrin_x86.h | 10 ---------- common/include/x86emitter/x86_intrin.h | 21 +++++++++++++++++++++ common/src/x86emitter/cpudetect.cpp | 18 +++++++++--------- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h index 862068d845..bba17b0c23 100644 --- a/common/include/Pcsx2Defs.h +++ b/common/include/Pcsx2Defs.h @@ -33,11 +33,7 @@ #include "Pcsx2Types.h" -#ifdef _MSC_VER -# include -#else -# include -#endif +#include "x86emitter/x86_intrin.h" // Renamed ARRAYSIZE to ArraySize -- looks nice and gets rid of Windows.h conflicts (air) // Notes: I'd have used ARRAY_SIZE instead but ran into cross-platform lib conflicts with diff --git a/common/include/intrin_x86.h b/common/include/intrin_x86.h index 6b83a2928e..d9342a9d84 100644 --- a/common/include/intrin_x86.h +++ b/common/include/intrin_x86.h @@ -69,16 +69,6 @@ static __inline__ __attribute__((always_inline)) s64 _InterlockedExchange64(vola } /*** System information ***/ -static __inline__ __attribute__((always_inline)) void __cpuid(int CPUInfo[], const int InfoType) -{ - __asm__ __volatile__("cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType)); -} - -static __inline__ __attribute__((always_inline)) void __cpuidex(int CPUInfo[], const int level, const int count) -{ - __asm__ __volatile__("cpuid": "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (level), "c" (count)); -} - static __inline__ __attribute__((always_inline)) unsigned long long _xgetbv(unsigned int index) { unsigned int eax, edx; diff --git a/common/include/x86emitter/x86_intrin.h b/common/include/x86emitter/x86_intrin.h index 572f70759c..c0c0a59477 100644 --- a/common/include/x86emitter/x86_intrin.h +++ b/common/include/x86emitter/x86_intrin.h @@ -31,6 +31,27 @@ #endif +// CPU information support +#if defined(_WIN32) + +#define cpuid __cpuid +#define cpuidex __cpuidex + +#else + +#include + +static __inline__ __attribute__((always_inline)) void cpuidex(int CPUInfo[], const int InfoType, const int count) { + __cpuid_count(InfoType, count, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +} + +static __inline__ __attribute__((always_inline)) void cpuid(int CPUInfo[], const int InfoType) { + __cpuid(InfoType, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); +} + +#endif + +// Rotate instruction #if defined(__clang__) // Seriously what is so complicated to provided this bunch of intrinsics in clangs. static unsigned int _rotr(unsigned int x, int s) diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index 38f506dba2..b6eb60bf84 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -149,7 +149,7 @@ void x86capabilities::CountCores() s32 regs[ 4 ]; u32 cmds; - __cpuid( regs, 0x80000000 ); + cpuid( regs, 0x80000000 ); cmds = regs[ 0 ]; // detect multicore for AMD cpu @@ -191,7 +191,7 @@ void x86capabilities::Identify() #endif memzero( VendorName ); - __cpuid( regs, 0 ); + cpuid( regs, 0 ); cmds = regs[ 0 ]; ((u32*)VendorName)[ 0 ] = regs[ 1 ]; @@ -211,7 +211,7 @@ void x86capabilities::Identify() if ( cmds >= 0x00000001 ) { - __cpuid( regs, 0x00000001 ); + cpuid( regs, 0x00000001 ); StepID = regs[ 0 ] & 0xf; Model = (regs[ 0 ] >> 4) & 0xf; @@ -227,16 +227,16 @@ void x86capabilities::Identify() if ( cmds >= 0x00000007 ) { // Note: ECX must be 0 for AVX2 detection. - __cpuidex( regs, 0x00000007, 0 ); + cpuidex( regs, 0x00000007, 0 ); SEFlag = regs[ 1 ]; } - __cpuid( regs, 0x80000000 ); + cpuid( regs, 0x80000000 ); cmds = regs[ 0 ]; if ( cmds >= 0x80000001 ) { - __cpuid( regs, 0x80000001 ); + cpuid( regs, 0x80000001 ); #ifdef __x86_64__ x86_64_12BITBRANDID = regs[1] & 0xfff; @@ -246,9 +246,9 @@ void x86capabilities::Identify() } memzero( FamilyName ); - __cpuid( (int*)FamilyName, 0x80000002); - __cpuid( (int*)(FamilyName+16), 0x80000003); - __cpuid( (int*)(FamilyName+32), 0x80000004); + cpuid( (int*)FamilyName, 0x80000002); + cpuid( (int*)(FamilyName+16), 0x80000003); + cpuid( (int*)(FamilyName+32), 0x80000004); hasFloatingPointUnit = ( Flags >> 0 ) & 1; hasVirtual8086ModeEnhancements = ( Flags >> 1 ) & 1; From e872552fdce578f28abdf1fef0b4677ecf71886a Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 16 Jul 2016 17:25:37 +0200 Subject: [PATCH 2/3] spu2x|common: s/jASSUME/pxAssume/ allow to remove code and __debugbreak intrinsic --- common/include/Pcsx2Defs.h | 27 ------------------- common/include/Utilities/lnx_memzero.h | 2 +- common/include/intrin_x86.h | 8 ------ plugins/spu2-x/src/ADSR.cpp | 2 +- plugins/spu2-x/src/Linux/Alsa.cpp | 4 +-- plugins/spu2-x/src/Mixer.cpp | 6 ++--- plugins/spu2-x/src/PS2E-spu2.cpp | 4 +-- plugins/spu2-x/src/SndOut.cpp | 4 +-- plugins/spu2-x/src/Timestretcher.cpp | 2 +- plugins/spu2-x/src/Windows/SndOut_DSound.cpp | 2 +- plugins/spu2-x/src/Windows/SndOut_XAudio2.cpp | 2 +- .../spu2-x/src/Windows/SndOut_XAudio2_27.cpp | 4 +-- plugins/spu2-x/src/Windows/SndOut_waveOut.cpp | 2 +- plugins/spu2-x/src/spu2sys.cpp | 6 ++--- 14 files changed, 20 insertions(+), 55 deletions(-) diff --git a/common/include/Pcsx2Defs.h b/common/include/Pcsx2Defs.h index bba17b0c23..6f4bd4ec5a 100644 --- a/common/include/Pcsx2Defs.h +++ b/common/include/Pcsx2Defs.h @@ -42,33 +42,6 @@ # define ArraySize(x) (sizeof(x)/sizeof((x)[0])) #endif -// -------------------------------------------------------------------------------------- -// jASSUME - give hints to the optimizer [obsolete, use pxAssume() instead] -// -------------------------------------------------------------------------------------- -// This is primarily useful for the default case switch optimizer, which enables VC to -// generate more compact switches. -// -// Note: When using the PCSX2 Utilities library, this is deprecated. Use pxAssert instead, -// which itself optimizes to an __assume() hint in release mode builds. -// -#ifndef jASSUME -# ifdef NDEBUG -# define jBREAKPOINT() ((void) 0) -# ifdef _MSC_VER -# define jASSUME(exp) (__assume(exp)) -# else -# define jASSUME(exp) do { if(!(exp)) __builtin_unreachable(); } while(0) -# endif -# else -# define jBREAKPOINT() __debugbreak(); -# ifdef wxASSERT -# define jASSUME(exp) wxASSERT(exp) -# else -# define jASSUME(exp) do { if(!(exp)) jBREAKPOINT(); } while(0) -# endif -# endif -#endif - // -------------------------------------------------------------------------------------- // Dev / Debug conditionals - Consts for using if() statements instead of uglier #ifdef. // -------------------------------------------------------------------------------------- diff --git a/common/include/Utilities/lnx_memzero.h b/common/include/Utilities/lnx_memzero.h index 9c609dc74a..52ca0abab2 100644 --- a/common/include/Utilities/lnx_memzero.h +++ b/common/include/Utilities/lnx_memzero.h @@ -25,7 +25,7 @@ static __fi void memset32( T& obj ) // If the data length is not a factor of 32 bits, the C++ optimizing compiler will // probably just generate mysteriously broken code in Release builds. ;) - jASSUME( (sizeof(T) & 0x3) == 0 ); + pxAssume((sizeof(T) & 0x3) == 0); u32* dest = (u32*)&obj; for( int i=sizeof(T)>>2; i; --i, ++dest ) diff --git a/common/include/intrin_x86.h b/common/include/intrin_x86.h index d9342a9d84..320b336470 100644 --- a/common/include/intrin_x86.h +++ b/common/include/intrin_x86.h @@ -75,11 +75,3 @@ static __inline__ __attribute__((always_inline)) unsigned long long _xgetbv(unsi __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); return ((unsigned long long)edx << 32) | eax; } - -/*** Interrupts ***/ -#ifndef __linux__ -static __inline__ __attribute__((always_inline)) void __debugbreak(void) -{ - __asm__("int $3"); -} -#endif diff --git a/plugins/spu2-x/src/ADSR.cpp b/plugins/spu2-x/src/ADSR.cpp index 38ffbbf923..b0012fb729 100644 --- a/plugins/spu2-x/src/ADSR.cpp +++ b/plugins/spu2-x/src/ADSR.cpp @@ -54,7 +54,7 @@ static int GetLinearSrAr( uint SrAr ) bool V_ADSR::Calculate() { - jASSUME( Phase != 0 ); + pxAssume( Phase != 0 ); if(Releasing && (Phase < 5)) Phase = 5; diff --git a/plugins/spu2-x/src/Linux/Alsa.cpp b/plugins/spu2-x/src/Linux/Alsa.cpp index 7a67c7501a..107c6e89e7 100644 --- a/plugins/spu2-x/src/Linux/Alsa.cpp +++ b/plugins/spu2-x/src/Linux/Alsa.cpp @@ -69,8 +69,8 @@ protected: fprintf(stderr,"* SPU2-X:Iz in your external callback.\n"); AlsaMod *data = (AlsaMod*)snd_async_handler_get_callback_private( pcm_call ); - jASSUME( data != NULL ); - //jASSUME( data->handle == snd_async_handler_get_pcm(pcm_call) ); + pxAssume( data != NULL ); + //pxAssume( data->handle == snd_async_handler_get_pcm(pcm_call) ); // Not sure if we just need an assert, or something like this: if (data->handle != snd_async_handler_get_pcm(pcm_call)) diff --git a/plugins/spu2-x/src/Mixer.cpp b/plugins/spu2-x/src/Mixer.cpp index 32f71aac0a..6a8ae2ceff 100644 --- a/plugins/spu2-x/src/Mixer.cpp +++ b/plugins/spu2-x/src/Mixer.cpp @@ -398,7 +398,7 @@ static __forceinline void CalculateADSR( V_Core& thiscore, uint voiceidx ) vc.Stop(); } - jASSUME( vc.ADSR.Value >= 0 ); // ADSR should never be negative... + pxAssume( vc.ADSR.Value >= 0 ); // ADSR should never be negative... } /* @@ -529,7 +529,7 @@ static __forceinline s32 GetNoiseValues( V_Core& thiscore, uint voiceidx ) // GetNoiseValues can't set the phase zero on us unexpectedly // like GetVoiceValues can. Better assert just in case though.. - jASSUME( vc.ADSR.Phase != 0 ); + pxAssume( vc.ADSR.Phase != 0 ); return retval; } @@ -554,7 +554,7 @@ static __forceinline void spu2M_WriteFast( u32 addr, s16 value ) } // throw an assertion if the memory range is invalid: #ifndef DEBUG_FAST - jASSUME( addr < SPU2_DYN_MEMLINE ); + pxAssume( addr < SPU2_DYN_MEMLINE ); #endif *GetMemPtr( addr ) = value; } diff --git a/plugins/spu2-x/src/PS2E-spu2.cpp b/plugins/spu2-x/src/PS2E-spu2.cpp index d7432b1f1e..9bf4e15096 100644 --- a/plugins/spu2-x/src/PS2E-spu2.cpp +++ b/plugins/spu2-x/src/PS2E-spu2.cpp @@ -671,7 +671,7 @@ EXPORT_C_(int) SPU2setupRecording(int start, void* pData) EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data) { - jASSUME( data != NULL ); + pxAssume( data != NULL ); if ( !data ) { printf("SPU2-X savestate null pointer!\n"); @@ -684,7 +684,7 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data) return 0; } - jASSUME( mode == FREEZE_LOAD || mode == FREEZE_SAVE ); + pxAssume( mode == FREEZE_LOAD || mode == FREEZE_SAVE ); if( data->data == NULL ) { diff --git a/plugins/spu2-x/src/SndOut.cpp b/plugins/spu2-x/src/SndOut.cpp index 2359db0487..2dc18268b8 100644 --- a/plugins/spu2-x/src/SndOut.cpp +++ b/plugins/spu2-x/src/SndOut.cpp @@ -262,8 +262,8 @@ template void SndBuffer::ReadSamples(T* bData) int quietSamples; if( CheckUnderrunStatus( nSamples, quietSamples ) ) { - jASSUME( nSamples <= SndOutPacketSize ); - + pxAssume( nSamples <= SndOutPacketSize ); + // WARNING: This code assumes there's only ONE reading process. int b1 = m_size - m_rpos; diff --git a/plugins/spu2-x/src/Timestretcher.cpp b/plugins/spu2-x/src/Timestretcher.cpp index 83bccd549c..0b912735c0 100644 --- a/plugins/spu2-x/src/Timestretcher.cpp +++ b/plugins/spu2-x/src/Timestretcher.cpp @@ -458,7 +458,7 @@ static void CvtPacketToFloat( StereoOut32* srcdest ) // Parameter note: Size should always be a multiple of 128, thanks! static void CvtPacketToInt( StereoOut32* srcdest, uint size ) { - //jASSUME( (size & 127) == 0 ); + //pxAssume( (size & 127) == 0 ); const StereoOutFloat* src = (StereoOutFloat*)srcdest; StereoOut32* dest = srcdest; diff --git a/plugins/spu2-x/src/Windows/SndOut_DSound.cpp b/plugins/spu2-x/src/Windows/SndOut_DSound.cpp index a415d0bf4a..a706959e0d 100644 --- a/plugins/spu2-x/src/Windows/SndOut_DSound.cpp +++ b/plugins/spu2-x/src/Windows/SndOut_DSound.cpp @@ -476,7 +476,7 @@ BOOL CALLBACK DSound::ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam BOOL CALLBACK DSound::DSEnumCallback( LPGUID lpGuid, LPCTSTR lpcstrDescription, LPCTSTR lpcstrModule, LPVOID lpContext ) { - jASSUME( DSoundOut != NULL ); + pxAssume( DSoundOut != NULL ); return DS._DSEnumCallback( lpGuid, lpcstrDescription, lpcstrModule, lpContext ); } diff --git a/plugins/spu2-x/src/Windows/SndOut_XAudio2.cpp b/plugins/spu2-x/src/Windows/SndOut_XAudio2.cpp index 523cac20a7..2398b58d85 100644 --- a/plugins/spu2-x/src/Windows/SndOut_XAudio2.cpp +++ b/plugins/spu2-x/src/Windows/SndOut_XAudio2.cpp @@ -241,7 +241,7 @@ public: { HRESULT hr; - jASSUME(pXAudio2 == NULL); + pxAssume(pXAudio2 == NULL); xAudio2DLL = LoadLibraryEx(XAUDIO2_DLL, nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32); if (xAudio2DLL == nullptr) diff --git a/plugins/spu2-x/src/Windows/SndOut_XAudio2_27.cpp b/plugins/spu2-x/src/Windows/SndOut_XAudio2_27.cpp index ba39334128..b79d9fe97e 100644 --- a/plugins/spu2-x/src/Windows/SndOut_XAudio2_27.cpp +++ b/plugins/spu2-x/src/Windows/SndOut_XAudio2_27.cpp @@ -243,7 +243,7 @@ public: { HRESULT hr; - jASSUME( pXAudio2 == NULL ); + pxAssume( pXAudio2 == NULL ); // On some systems XAudio2.7 can unload itself and cause PCSX2 to crash. // Maintain an extra library reference so it can't do so. Does not @@ -291,7 +291,7 @@ public: } // Any windows driver should support stereo at the software level, I should think! - jASSUME( deviceDetails.OutputFormat.Format.nChannels > 1 ); + pxAssume( deviceDetails.OutputFormat.Format.nChannels > 1 ); // // Create a mastering voice diff --git a/plugins/spu2-x/src/Windows/SndOut_waveOut.cpp b/plugins/spu2-x/src/Windows/SndOut_waveOut.cpp index 7ae1078956..9bf512b3d5 100644 --- a/plugins/spu2-x/src/Windows/SndOut_waveOut.cpp +++ b/plugins/spu2-x/src/Windows/SndOut_waveOut.cpp @@ -99,7 +99,7 @@ public: speakerConfig = 2; // better not mess with this in wavout :p (rama) // Any windows driver should support stereo at the software level, I should think! - jASSUME( speakerConfig > 1 ); + pxAssume( speakerConfig > 1 ); LPTHREAD_START_ROUTINE threadproc; switch( speakerConfig ) diff --git a/plugins/spu2-x/src/spu2sys.cpp b/plugins/spu2-x/src/spu2sys.cpp index 958d644393..494cf946bc 100644 --- a/plugins/spu2-x/src/spu2sys.cpp +++ b/plugins/spu2-x/src/spu2sys.cpp @@ -57,7 +57,7 @@ __forceinline s16* GetMemPtr(u32 addr) #ifndef DEBUG_FAST // In case you're wondering, this assert is the reason SPU2-X // runs so incrediously slow in Debug mode. :P - jASSUME( addr < 0x100000 ); + pxAssume( addr < 0x100000 ); #endif return (_spu2mem+addr); } @@ -491,7 +491,7 @@ void V_VolumeSlide::RegSet( u16 src ) void V_Core::WriteRegPS1( u32 mem, u16 value ) { - jASSUME( Index == 0 ); // Valid on Core 0 only! + pxAssume( Index == 0 ); // Valid on Core 0 only! bool show = true; u32 reg = mem & 0xffff; @@ -641,7 +641,7 @@ void V_Core::WriteRegPS1( u32 mem, u16 value ) u16 V_Core::ReadRegPS1(u32 mem) { - jASSUME( Index == 0 ); // Valid on Core 0 only! + pxAssume( Index == 0 ); // Valid on Core 0 only! bool show=true; u16 value = spu2Ru16(mem); From 6e306ee44f91f3d865c33bb10fc01bc82195456b Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 17 Jul 2016 10:20:57 +0200 Subject: [PATCH 3/3] common: move _xgetbv with cpuid intrin Besides, code comes from Gabest and not reactOS --- common/include/intrin_x86.h | 8 -------- common/include/x86emitter/x86_intrin.h | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/common/include/intrin_x86.h b/common/include/intrin_x86.h index 320b336470..6e6873207b 100644 --- a/common/include/intrin_x86.h +++ b/common/include/intrin_x86.h @@ -67,11 +67,3 @@ static __inline__ __attribute__((always_inline)) s64 _InterlockedExchange64(vola __sync_synchronize(); return __sync_lock_test_and_set(Target, Value); } - -/*** System information ***/ -static __inline__ __attribute__((always_inline)) unsigned long long _xgetbv(unsigned int index) -{ - unsigned int eax, edx; - __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); - return ((unsigned long long)edx << 32) | eax; -} diff --git a/common/include/x86emitter/x86_intrin.h b/common/include/x86emitter/x86_intrin.h index c0c0a59477..95fa0c98d7 100644 --- a/common/include/x86emitter/x86_intrin.h +++ b/common/include/x86emitter/x86_intrin.h @@ -49,6 +49,13 @@ static __inline__ __attribute__((always_inline)) void cpuid(int CPUInfo[], const __cpuid(InfoType, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); } +static __inline__ __attribute__((always_inline)) unsigned long long _xgetbv(unsigned int index) +{ + unsigned int eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((unsigned long long)edx << 32) | eax; +} + #endif // Rotate instruction