Finish the Linux implementation of memcpy_fast_. I've disabled it by default until I'm sure it's working right, but it can easily be enabled in build.sh. Should be a speed boost in Linux (which Windows already had), but I haven't tested it enough to be able to tell yet.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@643 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-03-01 08:21:14 +00:00
parent ad0705de56
commit 2270ba4eee
4 changed files with 25 additions and 10 deletions

View File

@ -11,6 +11,10 @@
#Optimized, but a devbuild #Optimized, but a devbuild
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`" export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
#Optimized, but a devbuild - with memcpy_fast_ enabled.
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --enable-memcpyfast --prefix `pwd`"
#Debug / Devbuild version #Debug / Devbuild version
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`" #export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"

View File

@ -50,14 +50,15 @@ void _memset16_unaligned( void* dest, u16 data, size_t size );
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize); extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
# include "Linux/memzero.h" # include "Linux/memzero.h"
#if defined(LINUX_USE_FAST_MEMORY)
# define memcpy_fast memcpy_amd_
# define memcpy_aligned memcpy_amd_
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
#else
# define memcpy_fast memcpy # define memcpy_fast memcpy
# define memcpy_aligned memcpy # define memcpy_aligned memcpy
#endif // LINUX_USE_FAST_MEMORY
// Currently broken. #endif // WIN32
//# define memcpy_fast memcpy_amd_
//# define memcpy_aligned memcpy_amd_
// extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
#endif #endif //Header
#endif

View File

@ -63,6 +63,15 @@ AC_MSG_RESULT($debug)
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC)) AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
AC_MSG_CHECKING(turn on memcpy_fast_)
AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
memcpyfast=$enableval,memcpyfast=no)
if test "x$memcpyfast" == xyes
then
AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
fi
AC_MSG_RESULT($memcpyfast)
dnl Check for dev build dnl Check for dev build
AC_MSG_CHECKING(for development build) AC_MSG_CHECKING(for development build)
AC_ARG_ENABLE(devbuild, AC_HELP_STRING([--enable-devbuild], [Special Build for developers that simplifies testing and adds extra checks]), AC_ARG_ENABLE(devbuild, AC_HELP_STRING([--enable-devbuild], [Special Build for developers that simplifies testing and adds extra checks]),
@ -138,3 +147,4 @@ echo " Force sse3? $sse3"
echo " nls support? $nls" echo " nls support? $nls"
echo " local plugin inis? $localinis" echo " local plugin inis? $localinis"
echo " custom cflags? $customcflags" echo " custom cflags? $customcflags"
echo " memcpy_fast? $memcpyfast"

View File

@ -359,7 +359,7 @@ memcpy_amd_:
$memcpy_do_align: $memcpy_do_align:
mov %eax, 8 // a trick that's faster than rep movsb... mov %eax, 8 // a trick that's faster than rep movsb...
sub %eax, %edi // align destination to qword sub %eax, %edi // align destination to qword
and %eax, 0x111b // get the low bits andb %eax, 111 // get the low bits
sub %ecx, %eax // update copy count sub %ecx, %eax // update copy count
neg %eax // set up to jump into the array neg %eax // set up to jump into the array
add %eax, offset $memcpy_align_done add %eax, offset $memcpy_align_done
@ -427,7 +427,7 @@ $memcpy_ic_2:
mov %eax, %ecx // has valid low 6 bits of the byte count mov %eax, %ecx // has valid low 6 bits of the byte count
$memcpy_ic_3: $memcpy_ic_3:
shr %eax, 2 // dword count shr %eax, 2 // dword count
and %eax, 0x1111b // only look at the "remainder" bits andb %eax, 1111 // only look at the "remainder" bits
neg %eax // set up to jump into the array neg %eax // set up to jump into the array
add %eax, offset $memcpy_last_few add %eax, offset $memcpy_last_few
jmp %eax // jump to array of movsd's jmp %eax // jump to array of movsd's
@ -512,7 +512,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
$memcpy_last_few: // dword aligned from before movsd's $memcpy_last_few: // dword aligned from before movsd's
mov %eax, %ecx // has valid low 2 bits of the byte count mov %eax, %ecx // has valid low 2 bits of the byte count
and %eax, 0x11b // the last few cows must come home andb %eax, 11 // the last few cows must come home
jz $memcpy_final // no more, let's leave jz $memcpy_final // no more, let's leave
rep movsb // the last 1, 2, or 3 bytes rep movsb // the last 1, 2, or 3 bytes