mirror of https://github.com/PCSX2/pcsx2.git
Linux: memcpy_fast seems stable enough on Linux, so I'm removing the switch, and turning it on by default.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@759 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
93529360a2
commit
71c4561f86
5
build.sh
5
build.sh
|
@ -9,14 +9,11 @@
|
||||||
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --prefix `pwd`"
|
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --prefix `pwd`"
|
||||||
|
|
||||||
#Optimized, but a devbuild
|
#Optimized, but a devbuild
|
||||||
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
|
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
|
||||||
|
|
||||||
#Debug / Devbuild version
|
#Debug / Devbuild version
|
||||||
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"
|
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"
|
||||||
|
|
||||||
#Optimized, but a devbuild - with memcpy_fast_ enabled. - EXPERIMENTAL
|
|
||||||
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --enable-memcpyfast --prefix `pwd`"
|
|
||||||
|
|
||||||
#ZeroGS Normal mode
|
#ZeroGS Normal mode
|
||||||
export ZEROGSOPTIONS="--enable-sse2"
|
export ZEROGSOPTIONS="--enable-sse2"
|
||||||
|
|
||||||
|
|
|
@ -153,7 +153,7 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) {
|
||||||
psHu32(0x1000F240) &= ~0x40;
|
psHu32(0x1000F240) &= ~0x40;
|
||||||
psHu32(0x1000F240) &= ~0x100;
|
psHu32(0x1000F240) &= ~0x100;
|
||||||
psHu32(0x1000F240) &= ~0x4000;
|
psHu32(0x1000F240) &= ~0x4000;
|
||||||
FreezeXMMRegs(0)
|
FreezeXMMRegs(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,46 +19,29 @@
|
||||||
#ifndef __MEMCPY_FAST_H__
|
#ifndef __MEMCPY_FAST_H__
|
||||||
#define __MEMCPY_FAST_H__
|
#define __MEMCPY_FAST_H__
|
||||||
|
|
||||||
void _memset16_unaligned( void* dest, u16 data, size_t size );
|
#if defined(_WIN32)
|
||||||
|
#include "windows/memzero.h"
|
||||||
|
#else
|
||||||
|
#include "Linux/memzero.h"
|
||||||
|
#endif // WIN32
|
||||||
|
|
||||||
#if defined(_WIN32) && !defined(__x86_64__)
|
void _memset16_unaligned( void* dest, u16 data, size_t size );
|
||||||
|
|
||||||
// The new simplified memcpy_amd_ is now faster than memcpy_raz_.
|
// The new simplified memcpy_amd_ is now faster than memcpy_raz_.
|
||||||
// memcpy_amd_ also does mmx register saving, negating the need for freezeregs (code cleanup!)
|
// memcpy_amd_ also does mmx register saving, negating the need for freezeregs (code cleanup!)
|
||||||
// Additionally, using one single memcpy implementation keeps the code cache cleaner.
|
// Additionally, using one single memcpy implementation keeps the code cache cleaner.
|
||||||
|
|
||||||
//extern void __fastcall memcpy_raz_udst(void *dest, const void *src, size_t bytes);
|
#ifdef __LINUX__
|
||||||
//extern void __fastcall memcpy_raz_usrc(void *dest, const void *src, size_t bytes);
|
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||||
//extern void __fastcall memcpy_raz_(void *dest, const void *src, size_t bytes);
|
extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
||||||
|
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
||||||
|
#else
|
||||||
extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||||
extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
||||||
extern void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
extern void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
||||||
|
#endif
|
||||||
|
|
||||||
# include "windows/memzero.h"
|
#define memcpy_fast memcpy_amd_
|
||||||
# define memcpy_fast memcpy_amd_
|
#define memcpy_aligned memcpy_amd_
|
||||||
# define memcpy_aligned memcpy_amd_
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
// for now linux uses the GCC memcpy/memset implementations.
|
|
||||||
//#define memcpy_raz_udst memcpy
|
|
||||||
//#define memcpy_raz_usrc memcpy
|
|
||||||
//#define memcpy_raz_ memcpy
|
|
||||||
|
|
||||||
// fast_routines.S
|
|
||||||
extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
|
|
||||||
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
|
||||||
|
|
||||||
# include "Linux/memzero.h"
|
|
||||||
#if defined(LINUX_USE_FAST_MEMORY)
|
|
||||||
# define memcpy_fast memcpy_amd_
|
|
||||||
# define memcpy_aligned memcpy_amd_
|
|
||||||
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
|
||||||
#else
|
|
||||||
# define memcpy_fast memcpy
|
|
||||||
# define memcpy_aligned memcpy
|
|
||||||
#endif // LINUX_USE_FAST_MEMORY
|
|
||||||
|
|
||||||
#endif // WIN32
|
|
||||||
|
|
||||||
#endif //Header
|
#endif //Header
|
||||||
|
|
|
@ -63,14 +63,14 @@ AC_MSG_RESULT($debug)
|
||||||
|
|
||||||
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
|
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
|
||||||
|
|
||||||
AC_MSG_CHECKING(turn on memcpy_fast_)
|
#AC_MSG_CHECKING(turn on memcpy_fast_)
|
||||||
AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
|
#AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
|
||||||
memcpyfast=$enableval,memcpyfast=no)
|
#memcpyfast=$enableval,memcpyfast=no)
|
||||||
if test "x$memcpyfast" == xyes
|
#if test "x$memcpyfast" == xyes
|
||||||
then
|
#then
|
||||||
AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
|
# AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
|
||||||
fi
|
#fi
|
||||||
AC_MSG_RESULT($memcpyfast)
|
#AC_MSG_RESULT($memcpyfast)
|
||||||
|
|
||||||
#AC_MSG_CHECKING(turn on microVU)
|
#AC_MSG_CHECKING(turn on microVU)
|
||||||
#AC_ARG_ENABLE(microVU, AC_HELP_STRING([--enable-microVU], [Turns on the currently incomplete microVU files - Not a good idea]),
|
#AC_ARG_ENABLE(microVU, AC_HELP_STRING([--enable-microVU], [Turns on the currently incomplete microVU files - Not a good idea]),
|
||||||
|
|
|
@ -698,19 +698,10 @@ void* SuperVUGetProgram(u32 startpc, int vuindex)
|
||||||
bool VuFunctionHeader::IsSame(void* pmem)
|
bool VuFunctionHeader::IsSame(void* pmem)
|
||||||
{
|
{
|
||||||
#ifdef SUPERVU_CACHING
|
#ifdef SUPERVU_CACHING
|
||||||
//u32 checksum[2];
|
|
||||||
vector<RANGE>::iterator it;
|
vector<RANGE>::iterator it;
|
||||||
FORIT(it, ranges) {
|
FORIT(it, ranges)
|
||||||
//memxor_mmx(checksum, (u8*)pmem+it->start, it->size);
|
{
|
||||||
//if( checksum[0] != it->checksum[0] || checksum[1] != it->checksum[1] )
|
if( memcmp_mmx((u8*)pmem+it->start, it->pmem, it->size) )
|
||||||
// return false;
|
|
||||||
// memcmp_mmx doesn't work on x86-64 machines
|
|
||||||
// and neither does pcsx2.
|
|
||||||
//#if defined(_MSC_VER)
|
|
||||||
if( memcmp_mmx((u8*)pmem+it->start, it->pmem, it->size) )
|
|
||||||
//#else
|
|
||||||
// if( memcmp((u8*)pmem+it->start, it->pmem, it->size) )
|
|
||||||
//#endif
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -486,19 +486,6 @@ void recADDIU( void )
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
void recDADDI( void )
|
void recDADDI( void )
|
||||||
{
|
{
|
||||||
#ifdef __x86_64_
|
|
||||||
if ( ! _Rt_ )
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
MOV64MtoR( RAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] );
|
|
||||||
if ( _Imm_ != 0 )
|
|
||||||
{
|
|
||||||
ADD64ItoR( EAX, _Imm_ );
|
|
||||||
}
|
|
||||||
MOV64RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX );
|
|
||||||
#else
|
|
||||||
if ( ! _Rt_ )
|
if ( ! _Rt_ )
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
|
@ -520,7 +507,6 @@ void recDADDI( void )
|
||||||
}
|
}
|
||||||
MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
|
MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
|
||||||
MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX );
|
MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX );
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
|
|
Loading…
Reference in New Issue