mirror of https://github.com/PCSX2/pcsx2.git
Linux: Fix bugs in _aligned_realloc and newVif's inlined SSE HashBucket finder.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2395 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
4b0b270776
commit
3d9bb25505
|
@ -28,7 +28,7 @@ static const uint headsize = sizeof(AlignedMallocHeader);
|
||||||
|
|
||||||
void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
|
void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
|
||||||
{
|
{
|
||||||
jASSUME( align < 0x10000 );
|
pxAssume( align < 0x10000 );
|
||||||
|
|
||||||
u8* p = (u8*)malloc(size+align+headsize);
|
u8* p = (u8*)malloc(size+align+headsize);
|
||||||
|
|
||||||
|
@ -47,15 +47,16 @@ void* __fastcall pcsx2_aligned_malloc(size_t size, size_t align)
|
||||||
|
|
||||||
void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
|
void* __fastcall pcsx2_aligned_realloc(void* handle, size_t size, size_t align)
|
||||||
{
|
{
|
||||||
if( handle == NULL ) return NULL;
|
pxAssume( align < 0x10000 );
|
||||||
jASSUME( align < 0x10000 );
|
|
||||||
|
|
||||||
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
|
|
||||||
|
|
||||||
void* newbuf = pcsx2_aligned_malloc( size, align );
|
void* newbuf = pcsx2_aligned_malloc( size, align );
|
||||||
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
|
|
||||||
|
|
||||||
free( header->baseptr );
|
if( handle != NULL )
|
||||||
|
{
|
||||||
|
AlignedMallocHeader* header = (AlignedMallocHeader*)((uptr)handle - headsize);
|
||||||
|
memcpy_fast( newbuf, handle, std::min( size, header->size ) );
|
||||||
|
free( header->baseptr );
|
||||||
|
}
|
||||||
return newbuf;
|
return newbuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +75,7 @@ __forceinline void pcsx2_aligned_free(void* pmem)
|
||||||
// memzero_obj and stuff).
|
// memzero_obj and stuff).
|
||||||
__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
|
__forceinline void _memset16_unaligned( void* dest, u16 data, size_t size )
|
||||||
{
|
{
|
||||||
jASSUME( (size & 0x1) == 0 );
|
pxAssume( (size & 0x1) == 0 );
|
||||||
|
|
||||||
u16* dst = (u16*)dest;
|
u16* dst = (u16*)dest;
|
||||||
for(int i=size; i; --i, ++dst )
|
for(int i=size; i; --i, ++dst )
|
||||||
|
|
|
@ -16,6 +16,20 @@
|
||||||
#include "xmmintrin.h"
|
#include "xmmintrin.h"
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
// Create some typecast operators for SIMD operations. For some reason MSVC needs a
|
||||||
|
// handle/reference typecast to avoid error. GCC (and presumably other compilers)
|
||||||
|
// generate an error if the handle/ref is used. Honestly neither makes sense, since
|
||||||
|
// both typecasts should be perfectly valid >_<. --air
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
# define cast_m128 __m128&
|
||||||
|
# define cast_m128i __m128i&
|
||||||
|
# define cast_m128d __m128d&
|
||||||
|
#else // defined(__GNUC__)
|
||||||
|
# define cast_m128 __m128
|
||||||
|
# define cast_m128i __m128i
|
||||||
|
# define cast_m128d __m128d
|
||||||
|
#endif
|
||||||
|
|
||||||
template< typename T >
|
template< typename T >
|
||||||
struct SizeChain
|
struct SizeChain
|
||||||
{
|
{
|
||||||
|
@ -54,7 +68,7 @@ public:
|
||||||
for (int i=bucket.Size; i; --i) {
|
for (int i=bucket.Size; i; --i) {
|
||||||
// This inline version seems about 1-2% faster in tests of games that average 1
|
// This inline version seems about 1-2% faster in tests of games that average 1
|
||||||
// program per bucket. Games that average more should see a bigger improvement --air
|
// program per bucket. Games that average more should see a bigger improvement --air
|
||||||
int result = _mm_movemask_ps( (__m128&) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
||||||
if( result == 0x7 ) return &bucket.Chain[i];
|
if( result == 0x7 ) return &bucket.Chain[i];
|
||||||
|
|
||||||
// Dynamically generated function version, can't be inlined. :(
|
// Dynamically generated function version, can't be inlined. :(
|
||||||
|
|
Loading…
Reference in New Issue