Linux: Fix bugs in _aligned_realloc and newVif's inlined SSE HashBucket finder.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2395 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-12-24 10:04:03 +00:00
parent 4b0b270776
commit 3d9bb25505
3 changed files with 390 additions and 375 deletions

View File

@ -28,7 +28,7 @@ static const uint headsize = sizeof(AlignedMallocHeader);
void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
{
jASSUME( align < 0x10000 );
pxAssume( align < 0x10000 );
u8* p = (u8*)malloc(size+align+headsize);
@ -47,15 +47,16 @@ void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
{
if( handle == NULL ) return NULL;
jASSUME( align < 0x10000 );
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
pxAssume( align < 0x10000 );
void* newbuf = pcsx2_aligned_malloc( size, align );
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
free( header->baseptr );
if( handle != NULL )
{
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
free( header->baseptr );
}
return newbuf;
}
@ -74,7 +75,7 @@ __forceinline void pcsx2_aligned_free(void* pmem)
// memzero_obj and stuff).
__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
{
jASSUME( (size & 0x1) == 0 );
pxAssume( (size & 0x1) == 0 );
u16* dst = (u16*)dest;
for(int i=size; i; --i, ++dst )

View File

@ -16,6 +16,20 @@
#include "xmmintrin.h"
#pragma once
// Create some typecast operators for SIMD operations. For some reason MSVC needs a
// handle/reference typecast to avoid error. GCC (and presumably other compilers)
// generate an error if the handle/ref is used. Honestly neither makes sense, since
// both typecasts should be perfectly valid >_<. --air
#ifdef _MSC_VER
# define cast_m128 __m128&
# define cast_m128i __m128i&
# define cast_m128d __m128d&
#else // defined(__GNUC__)
# define cast_m128 __m128
# define cast_m128i __m128i
# define cast_m128d __m128d
#endif
template< typename T >
struct SizeChain
{
@ -54,7 +68,7 @@ public:
for (int i=bucket.Size; i; --i) {
// This inline version seems about 1-2% faster in tests of games that average 1
// program per bucket. Games that average more should see a bigger improvement --air
int result = _mm_movemask_ps( (__m128&) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
if( result == 0x7 ) return &bucket.Chain[i];
// Dynamically generated function version, can't be inlined. :(