mirror of https://github.com/PCSX2/pcsx2.git
Finish the Linux implementation of memcpy_fast_. I've disabled it by default until I'm sure it's working right, but it can easily be enabled in build.sh. Should be a speed boost in Linux (which Windows already had), but I haven't tested it enough to be able to tell yet.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@643 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ad0705de56
commit
2270ba4eee
4
build.sh
4
build.sh
|
@ -11,6 +11,10 @@
|
||||||
#Optimized, but a devbuild
|
#Optimized, but a devbuild
|
||||||
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
|
export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd`"
|
||||||
|
|
||||||
|
|
||||||
|
#Optimized, but a devbuild - with memcpy_fast_ enabled.
|
||||||
|
#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --enable-memcpyfast --prefix `pwd`"
|
||||||
|
|
||||||
#Debug / Devbuild version
|
#Debug / Devbuild version
|
||||||
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"
|
#export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`"
|
||||||
|
|
||||||
|
|
|
@ -50,14 +50,15 @@ void _memset16_unaligned( void* dest, u16 data, size_t size );
|
||||||
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize);
|
||||||
|
|
||||||
# include "Linux/memzero.h"
|
# include "Linux/memzero.h"
|
||||||
|
#if defined(LINUX_USE_FAST_MEMORY)
|
||||||
|
# define memcpy_fast memcpy_amd_
|
||||||
|
# define memcpy_aligned memcpy_amd_
|
||||||
|
extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
||||||
|
#else
|
||||||
# define memcpy_fast memcpy
|
# define memcpy_fast memcpy
|
||||||
# define memcpy_aligned memcpy
|
# define memcpy_aligned memcpy
|
||||||
|
#endif // LINUX_USE_FAST_MEMORY
|
||||||
|
|
||||||
// Currently broken.
|
#endif // WIN32
|
||||||
//# define memcpy_fast memcpy_amd_
|
|
||||||
//# define memcpy_aligned memcpy_amd_
|
|
||||||
// extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes);
|
|
||||||
|
|
||||||
#endif
|
#endif //Header
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -63,6 +63,15 @@ AC_MSG_RESULT($debug)
|
||||||
|
|
||||||
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
|
AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC))
|
||||||
|
|
||||||
|
AC_MSG_CHECKING(turn on memcpy_fast_)
|
||||||
|
AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]),
|
||||||
|
memcpyfast=$enableval,memcpyfast=no)
|
||||||
|
if test "x$memcpyfast" == xyes
|
||||||
|
then
|
||||||
|
AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY])
|
||||||
|
fi
|
||||||
|
AC_MSG_RESULT($memcpyfast)
|
||||||
|
|
||||||
dnl Check for dev build
|
dnl Check for dev build
|
||||||
AC_MSG_CHECKING(for development build)
|
AC_MSG_CHECKING(for development build)
|
||||||
AC_ARG_ENABLE(devbuild, AC_HELP_STRING([--enable-devbuild], [Special Build for developers that simplifies testing and adds extra checks]),
|
AC_ARG_ENABLE(devbuild, AC_HELP_STRING([--enable-devbuild], [Special Build for developers that simplifies testing and adds extra checks]),
|
||||||
|
@ -138,3 +147,4 @@ echo " Force sse3? $sse3"
|
||||||
echo " nls support? $nls"
|
echo " nls support? $nls"
|
||||||
echo " local plugin inis? $localinis"
|
echo " local plugin inis? $localinis"
|
||||||
echo " custom cflags? $customcflags"
|
echo " custom cflags? $customcflags"
|
||||||
|
echo " memcpy_fast? $memcpyfast"
|
||||||
|
|
|
@ -359,7 +359,7 @@ memcpy_amd_:
|
||||||
$memcpy_do_align:
|
$memcpy_do_align:
|
||||||
mov %eax, 8 // a trick that's faster than rep movsb...
|
mov %eax, 8 // a trick that's faster than rep movsb...
|
||||||
sub %eax, %edi // align destination to qword
|
sub %eax, %edi // align destination to qword
|
||||||
and %eax, 0x111b // get the low bits
|
andb %eax, 111 // get the low bits
|
||||||
sub %ecx, %eax // update copy count
|
sub %ecx, %eax // update copy count
|
||||||
neg %eax // set up to jump into the array
|
neg %eax // set up to jump into the array
|
||||||
add %eax, offset $memcpy_align_done
|
add %eax, offset $memcpy_align_done
|
||||||
|
@ -427,7 +427,7 @@ $memcpy_ic_2:
|
||||||
mov %eax, %ecx // has valid low 6 bits of the byte count
|
mov %eax, %ecx // has valid low 6 bits of the byte count
|
||||||
$memcpy_ic_3:
|
$memcpy_ic_3:
|
||||||
shr %eax, 2 // dword count
|
shr %eax, 2 // dword count
|
||||||
and %eax, 0x1111b // only look at the "remainder" bits
|
andb %eax, 1111 // only look at the "remainder" bits
|
||||||
neg %eax // set up to jump into the array
|
neg %eax // set up to jump into the array
|
||||||
add %eax, offset $memcpy_last_few
|
add %eax, offset $memcpy_last_few
|
||||||
jmp %eax // jump to array of movsd's
|
jmp %eax // jump to array of movsd's
|
||||||
|
@ -512,7 +512,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
||||||
|
|
||||||
$memcpy_last_few: // dword aligned from before movsd's
|
$memcpy_last_few: // dword aligned from before movsd's
|
||||||
mov %eax, %ecx // has valid low 2 bits of the byte count
|
mov %eax, %ecx // has valid low 2 bits of the byte count
|
||||||
and %eax, 0x11b // the last few cows must come home
|
andb %eax, 11 // the last few cows must come home
|
||||||
jz $memcpy_final // no more, let's leave
|
jz $memcpy_final // no more, let's leave
|
||||||
rep movsb // the last 1, 2, or 3 bytes
|
rep movsb // the last 1, 2, or 3 bytes
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue