Linux: memcpy_fast seems stable enough on Linux, so I'm removing the switch, and turning it on by default.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@759 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-03-13 01:39:54 +00:00
parent 93529360a2
commit 71c4561f86
6 changed files with 28 additions and 71 deletions

View File

@ -9,14 +9,11 @@
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --prefix `pwd`"
#Optimized, but a devbuild
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
#Debug / Devbuild version
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"
#Optimized, but a devbuild - with memcpy_fast_ enabled. - EXPERIMENTAL
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --enable-memcpyfast --prefix `pwd`"
#ZeroGS Normal mode
export ZEROGSOPTIONS="--enable-sse2"

View File

@ -153,7 +153,7 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) {
psHu32(0x1000F240) &= ~0x40;
psHu32(0x1000F240) &= ~0x100;
psHu32(0x1000F240) &= ~0x4000;
FreezeXMMRegs(0)
FreezeXMMRegs(0);
}
}

View File

@ -19,46 +19,29 @@
#ifndef __MEMCPY_FAST_H__
#define __MEMCPY_FAST_H__
void _memset16_unaligned( void* dest, u16 data, size_t size );
#if defined(_WIN32)
#include "windows/memzero.h"
#else
#include "Linux/memzero.h"
#endif // WIN32
#if defined(_WIN32) && !defined(__x86_64__)
void _memset16_unaligned( void* dest, u16 data, size_t size );
// The new simplified memcpy_amd_ is now faster than memcpy_raz_.
// memcpy_amd_ also does mmx register saving, negating the need for freezeregs (code cleanup!)
// Additionally, using one single memcpy implementation keeps the code cache cleaner.
//extern void __fastcall memcpy_raz_udst(void *dest, const void *src, size_t bytes);
//extern void __fastcall memcpy_raz_usrc(void *dest, const void *src, size_t bytes);
//extern void __fastcall memcpy_raz_(void *dest, const void *src, size_t bytes);
#ifdef __LINUX__
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
#else
extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
extern void memxor_mmx(void* dst, const void* src1, int cmpsize);
#endif
# include "windows/memzero.h"
# define memcpy_fast memcpy_amd_
# define memcpy_aligned memcpy_amd_
#else
// for now linux uses the GCC memcpy/memset implementations.
//#define memcpy_raz_udst memcpy
//#define memcpy_raz_usrc memcpy
//#define memcpy_raz_ memcpy
// fast_routines.S
extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
# include "Linux/memzero.h"
#if defined(LINUX_USE_FAST_MEMORY)
# define memcpy_fast memcpy_amd_
# define memcpy_aligned memcpy_amd_
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
#else
# define memcpy_fast memcpy
# define memcpy_aligned memcpy
#endif // LINUX_USE_FAST_MEMORY
#endif // WIN32
#define memcpy_fast memcpy_amd_
#define memcpy_aligned memcpy_amd_
#endif //Header

View File

@ -63,14 +63,14 @@ AC_MSG_RESULT($debug)
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
AC_MSG_CHECKING(turn on memcpy_fast_)
AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
memcpyfast=$enableval,memcpyfast=no)
if test "x$memcpyfast" == xyes
then
AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
fi
AC_MSG_RESULT($memcpyfast)
#AC_MSG_CHECKING(turn on memcpy_fast_)
#AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
#memcpyfast=$enableval,memcpyfast=no)
#if test "x$memcpyfast" == xyes
#then
# AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
#fi
#AC_MSG_RESULT($memcpyfast)
#AC_MSG_CHECKING(turn on microVU)
#AC_ARG_ENABLE(microVU, AC_HELP_STRING([--enable-microVU], [Turns on the currently incomplete microVU files - Not a good idea]),

View File

@ -698,19 +698,10 @@ void* SuperVUGetProgram(u32 startpc, int vuindex)
bool VuFunctionHeader::IsSame(void* pmem)
{
#ifdef SUPERVU_CACHING
//u32 checksum[2];
vector<RANGE>::iterator it;
FORIT(it, ranges) {
//memxor_mmx(checksum, (u8*)pmem+it->start, it->size);
//if( checksum[0] != it->checksum[0] || checksum[1] != it->checksum[1] )
// return false;
// memcmp_mmx doesn't work on x86-64 machines
// and neither does pcsx2.
//#if defined(_MSC_VER)
if( memcmp_mmx((u8*)pmem+it->start, it->pmem, it->size) )
//#else
// if( memcmp((u8*)pmem+it->start, it->pmem, it->size) )
//#endif
FORIT(it, ranges)
{
if( memcmp_mmx((u8*)pmem+it->start, it->pmem, it->size) )
return false;
}
#endif

View File

@ -486,19 +486,6 @@ void recADDIU( void )
////////////////////////////////////////////////////
void recDADDI( void )
{
#ifdef __x86_64_
if ( ! _Rt_ )
{
return;
}
MOV64MtoR( RAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
if ( _Imm_ != 0 )
{
ADD64ItoR( EAX, _Imm_ );
}
MOV64RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX );
#else
if ( ! _Rt_ )
{
return;
@ -520,7 +507,6 @@ void recDADDI( void )
}
MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX );
#endif
}
////////////////////////////////////////////////////