Linux: Fix bugs in _aligned_realloc and newVif's inlined SSE HashBucket finder.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2395 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-12-24 10:04:03 +00:00 · 2009-12-24 10:04:03 +00:00 · 3d9bb25505
parent 4b0b270776
commit 3d9bb25505
3 changed files with 390 additions and 375 deletions
--- a/common/src/Utilities/AlignedMalloc.cpp
+++ b/common/src/Utilities/AlignedMalloc.cpp
@ -28,7 +28,7 @@ static const uint headsize = sizeof(AlignedMallocHeader);

 void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
 {
-	jASSUME( align < 0x10000 );
+	pxAssume( align < 0x10000 );

 	u8* p = (u8*)malloc(size+align+headsize);

@ -47,15 +47,16 @@ void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)

 void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
 {
-	if( handle == NULL ) return NULL;
-	jASSUME( align < 0x10000 );
-
-	AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
+	pxAssume( align < 0x10000 );

 	void* newbuf = pcsx2_aligned_malloc( size, align );
-	memcpy_fast( newbuf, handle, std::min( size, header->size ) );

-	free( header->baseptr );
+	if( handle != NULL )
+	{
+		AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
+		memcpy_fast( newbuf, handle, std::min( size, header->size ) );
+		free( header->baseptr );
+	}
 	return newbuf;
 }

@ -74,7 +75,7 @@ __forceinline void pcsx2_aligned_free(void* pmem)
 // memzero_obj and stuff).
 __forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
 {
-	jASSUME( (size & 0x1) == 0 );
+	pxAssume( (size & 0x1) == 0 );

 	u16* dst = (u16*)dest;
 	for(int i=size; i; --i, ++dst )
--- a/pcsx2/x86/newVif_HashBucket.h
+++ b/pcsx2/x86/newVif_HashBucket.h
@ -16,6 +16,20 @@
 #include "xmmintrin.h"
 #pragma once

+// Create some typecast operators for SIMD operations.  For some reason MSVC needs a
+// handle/reference typecast to avoid error.  GCC (and presumably other compilers)
+// generate an error if the handle/ref is used.  Honestly neither makes sense, since
+// both typecasts should be perfectly valid >_<.  --air
+#ifdef _MSC_VER
+#	define cast_m128		__m128&
+#	define cast_m128i		__m128i&
+#	define cast_m128d		__m128d&
+#else // defined(__GNUC__)
+#	define cast_m128		__m128
+#	define cast_m128i		__m128i
+#	define cast_m128d		__m128d
+#endif
+
 template< typename T >
 struct SizeChain
 {
@ -54,7 +68,7 @@ public:
 		for (int i=bucket.Size; i; --i) {
 			// This inline version seems about 1-2% faster in tests of games that average 1
 			// program per bucket.  Games that average more should see a bigger improvement --air
-			int result = _mm_movemask_ps( (__m128&) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
+			int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
 			if( result == 0x7 ) return &bucket.Chain[i];

 			// Dynamically generated function version, can't be inlined. :(