Linux: Fix bugs in _aligned_realloc and newVif's inlined SSE HashBucket finder.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2395 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-12-24 10:04:03 +00:00
parent 4b0b270776
commit 3d9bb25505
3 changed files with 390 additions and 375 deletions

View File

@ -28,7 +28,7 @@ static const uint headsize = sizeof(AlignedMallocHeader);
void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align) void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
{ {
jASSUME( align < 0x10000 ); pxAssume( align < 0x10000 );
u8* p = (u8*)malloc(size+align+headsize); u8* p = (u8*)malloc(size+align+headsize);
@ -47,15 +47,16 @@ void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align) void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
{ {
if( handle == NULL ) return NULL; pxAssume( align < 0x10000 );
jASSUME( align < 0x10000 );
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
void* newbuf = pcsx2_aligned_malloc( size, align ); void* newbuf = pcsx2_aligned_malloc( size, align );
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
free( header->baseptr ); if( handle != NULL )
{
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
free( header->baseptr );
}
return newbuf; return newbuf;
} }
@ -74,7 +75,7 @@ __forceinline void pcsx2_aligned_free(void* pmem)
// memzero_obj and stuff). // memzero_obj and stuff).
__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size ) __forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
{ {
jASSUME( (size & 0x1) == 0 ); pxAssume( (size & 0x1) == 0 );
u16* dst = (u16*)dest; u16* dst = (u16*)dest;
for(int i=size; i; --i, ++dst ) for(int i=size; i; --i, ++dst )

View File

@ -16,6 +16,20 @@
#include "xmmintrin.h" #include "xmmintrin.h"
#pragma once #pragma once
// Create some typecast operators for SIMD operations. For some reason MSVC needs a
// handle/reference typecast to avoid error. GCC (and presumably other compilers)
// generate an error if the handle/ref is used. Honestly neither makes sense, since
// both typecasts should be perfectly valid >_<. --air
#ifdef _MSC_VER
# define cast_m128 __m128&
# define cast_m128i __m128i&
# define cast_m128d __m128d&
#else // defined(__GNUC__)
# define cast_m128 __m128
# define cast_m128i __m128i
# define cast_m128d __m128d
#endif
template< typename T > template< typename T >
struct SizeChain struct SizeChain
{ {
@ -54,7 +68,7 @@ public:
for (int i=bucket.Size; i; --i) { for (int i=bucket.Size; i; --i) {
// This inline version seems about 1-2% faster in tests of games that average 1 // This inline version seems about 1-2% faster in tests of games that average 1
// program per bucket. Games that average more should see a bigger improvement --air // program per bucket. Games that average more should see a bigger improvement --air
int result = _mm_movemask_ps( (__m128&) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7; int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
if( result == 0x7 ) return &bucket.Chain[i]; if( result == 0x7 ) return &bucket.Chain[i];
// Dynamically generated function version, can't be inlined. :( // Dynamically generated function version, can't be inlined. :(