mirror of https://github.com/PCSX2/pcsx2.git
Finish the Linux implementation of memcpy_fast_. I've disabled it by default until I'm sure it's working right, but it can easily be enabled in build.sh. Should be a speed boost in Linux (which Windows already had), but I haven't tested it enough to be able to tell yet.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@643 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ad0705de56
commit
2270ba4eee
4
build.sh
4
build.sh
|
@ -11,6 +11,10 @@
|
|||
#Optimized, but a devbuild
|
||||
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
|
||||
|
||||
|
||||
#Optimized, but a devbuild - with memcpy_fast_ enabled.
|
||||
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --enable-memcpyfast --prefix `pwd`"
|
||||
|
||||
#Debug / Devbuild version
|
||||
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"
|
||||
|
||||
|
|
|
@ -50,14 +50,15 @@ void _memset16_unaligned( void* dest, u16 data, size_t size );
|
|||
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
||||
|
||||
# include "Linux/memzero.h"
|
||||
#if defined(LINUX_USE_FAST_MEMORY)
|
||||
# define memcpy_fast memcpy_amd_
|
||||
# define memcpy_aligned memcpy_amd_
|
||||
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||
#else
|
||||
# define memcpy_fast memcpy
|
||||
# define memcpy_aligned memcpy
|
||||
#endif // LINUX_USE_FAST_MEMORY
|
||||
|
||||
// Currently broken.
|
||||
//# define memcpy_fast memcpy_amd_
|
||||
//# define memcpy_aligned memcpy_amd_
|
||||
// extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||
#endif // WIN32
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif //Header
|
||||
|
|
|
@ -63,6 +63,15 @@ AC_MSG_RESULT($debug)
|
|||
|
||||
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
|
||||
|
||||
AC_MSG_CHECKING(turn on memcpy_fast_)
|
||||
AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
|
||||
memcpyfast=$enableval,memcpyfast=no)
|
||||
if test "x$memcpyfast" == xyes
|
||||
then
|
||||
AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
|
||||
fi
|
||||
AC_MSG_RESULT($memcpyfast)
|
||||
|
||||
dnl Check for dev build
|
||||
AC_MSG_CHECKING(for development build)
|
||||
AC_ARG_ENABLE(devbuild, AC_HELP_STRING([--enable-devbuild], [Special Build for developers that simplifies testing and adds extra checks]),
|
||||
|
@ -138,3 +147,4 @@ echo " Force sse3? $sse3"
|
|||
echo " nls support? $nls"
|
||||
echo " local plugin inis? $localinis"
|
||||
echo " custom cflags? $customcflags"
|
||||
echo " memcpy_fast? $memcpyfast"
|
||||
|
|
|
@ -359,7 +359,7 @@ memcpy_amd_:
|
|||
$memcpy_do_align:
|
||||
mov %eax, 8 // a trick that's faster than rep movsb...
|
||||
sub %eax, %edi // align destination to qword
|
||||
and %eax, 0x111b // get the low bits
|
||||
andb %eax, 111 // get the low bits
|
||||
sub %ecx, %eax // update copy count
|
||||
neg %eax // set up to jump into the array
|
||||
add %eax, offset $memcpy_align_done
|
||||
|
@ -427,7 +427,7 @@ $memcpy_ic_2:
|
|||
mov %eax, %ecx // has valid low 6 bits of the byte count
|
||||
$memcpy_ic_3:
|
||||
shr %eax, 2 // dword count
|
||||
and %eax, 0x1111b // only look at the "remainder" bits
|
||||
andb %eax, 1111 // only look at the "remainder" bits
|
||||
neg %eax // set up to jump into the array
|
||||
add %eax, offset $memcpy_last_few
|
||||
jmp %eax // jump to array of movsd's
|
||||
|
@ -512,7 +512,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
|||
|
||||
$memcpy_last_few: // dword aligned from before movsd's
|
||||
mov %eax, %ecx // has valid low 2 bits of the byte count
|
||||
and %eax, 0x11b // the last few cows must come home
|
||||
andb %eax, 11 // the last few cows must come home
|
||||
jz $memcpy_final // no more, let's leave
|
||||
rep movsb // the last 1, 2, or 3 bytes
|
||||
|
||||
|
|
Loading…
Reference in New Issue