mirror of https://github.com/PCSX2/pcsx2.git
Linux: Fix bugs in _aligned_realloc and newVif's inlined SSE HashBucket finder.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2395 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
4b0b270776
commit
3d9bb25505
|
@ -28,7 +28,7 @@ static const uint headsize = sizeof(AlignedMallocHeader);
|
|||
|
||||
void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
|
||||
{
|
||||
jASSUME( align < 0x10000 );
|
||||
pxAssume( align < 0x10000 );
|
||||
|
||||
u8* p = (u8*)malloc(size+align+headsize);
|
||||
|
||||
|
@ -47,15 +47,16 @@ void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
|
|||
|
||||
void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
|
||||
{
|
||||
if( handle == NULL ) return NULL;
|
||||
jASSUME( align < 0x10000 );
|
||||
|
||||
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
|
||||
pxAssume( align < 0x10000 );
|
||||
|
||||
void* newbuf = pcsx2_aligned_malloc( size, align );
|
||||
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
|
||||
|
||||
free( header->baseptr );
|
||||
if( handle != NULL )
|
||||
{
|
||||
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
|
||||
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
|
||||
free( header->baseptr );
|
||||
}
|
||||
return newbuf;
|
||||
}
|
||||
|
||||
|
@ -74,7 +75,7 @@ __forceinline void pcsx2_aligned_free(void* pmem)
|
|||
// memzero_obj and stuff).
|
||||
__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
|
||||
{
|
||||
jASSUME( (size & 0x1) == 0 );
|
||||
pxAssume( (size & 0x1) == 0 );
|
||||
|
||||
u16* dst = (u16*)dest;
|
||||
for(int i=size; i; --i, ++dst )
|
||||
|
|
|
@ -16,6 +16,20 @@
|
|||
#include "xmmintrin.h"
|
||||
#pragma once
|
||||
|
||||
// Create some typecast operators for SIMD operations. For some reason MSVC needs a
|
||||
// handle/reference typecast to avoid error. GCC (and presumably other compilers)
|
||||
// generate an error if the handle/ref is used. Honestly neither makes sense, since
|
||||
// both typecasts should be perfectly valid >_<. --air
|
||||
#ifdef _MSC_VER
|
||||
# define cast_m128 __m128&
|
||||
# define cast_m128i __m128i&
|
||||
# define cast_m128d __m128d&
|
||||
#else // defined(__GNUC__)
|
||||
# define cast_m128 __m128
|
||||
# define cast_m128i __m128i
|
||||
# define cast_m128d __m128d
|
||||
#endif
|
||||
|
||||
template< typename T >
|
||||
struct SizeChain
|
||||
{
|
||||
|
@ -54,7 +68,7 @@ public:
|
|||
for (int i=bucket.Size; i; --i) {
|
||||
// This inline version seems about 1-2% faster in tests of games that average 1
|
||||
// program per bucket. Games that average more should see a bigger improvement --air
|
||||
int result = _mm_movemask_ps( (__m128&) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
||||
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
||||
if( result == 0x7 ) return &bucket.Chain[i];
|
||||
|
||||
// Dynamically generated function version, can't be inlined. :(
|
||||
|
|
Loading…
Reference in New Issue