zzogl-pg: Use the memcpy_fast routines from Utilities in Linux. (Right now, Windows is using an old copy of it, and Linux wasn't using it at all.)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2774 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-03-26 09:19:41 +00:00
parent 0a6b6472c7
commit 02fdc8f9fc
6 changed files with 197 additions and 187 deletions

View File

@ -62,9 +62,9 @@ void GLWindow::CloseWindow()
{
if ( glDisplay != NULL )
{
XCloseDisplay(glDisplay);
glDisplay = NULL;
}
XCloseDisplay(glDisplay);
glDisplay = NULL;
}
}
bool GLWindow::DisplayWindow(int _width, int _height)

View File

@ -872,8 +872,13 @@ char *SysLibError(); // Gets previous error loading sysbols
void SysCloseLibrary(void *lib); // Closes Library
void SysMessage(const char *fmt, ...);
#ifdef __LINUX__
#include "Utilities/MemcpyFast.h"
#define memcpy_amd memcpy_fast
#else
extern "C" void * memcpy_amd(void *dest, const void *src, size_t n);
extern "C" u8 memcmp_mmx(const void *dest, const void *src, int n);
#endif
template <typename T>
class CInterfacePtr

View File

@ -16,6 +16,9 @@
<Compiler>
<Add option="-g" />
</Compiler>
<Linker>
<Add library="../../../../../deps/debug/libUtilities.a" />
</Linker>
</Target>
<Target title="Release">
<Option output="../../../../../bin/plugins/ZZOgl-PG" prefix_auto="1" extension_auto="1" />
@ -29,6 +32,7 @@
</Compiler>
<Linker>
<Add option="-s" />
<Add library="../../../../../deps/release/libUtilities.a" />
</Linker>
</Target>
</Build>
@ -44,6 +48,7 @@
<Add directory="../../../opengl" />
<Add directory="../../../../../common/include" />
<Add directory="../../../../../3rdparty" />
<Add directory="../../../../../common/include/Utilities" />
</Compiler>
<Linker>
<Add option="`pkg-config gtk+-2.0 --libs`" />
@ -119,7 +124,6 @@
<Option compilerVar="CC" />
</Unit>
<Unit filename="../../glprocs.h" />
<Unit filename="../../memcpy_amd.cpp" />
<Unit filename="../../rasterfont.cpp" />
<Unit filename="../../rasterfont.h" />
<Unit filename="../../targets.cpp" />

View File

@ -18,16 +18,16 @@ EXEEXT=$(preext)@so_ext@
traplib_PROGRAMS=libzzoglpg
libzzoglpg_SOURCES=
libzzoglpg_DEPENDENCIES = libzzoglpg.a
libzzoglpg_DEPENDENCIES = libzzoglpg.a ../../../deps/release/libUtilities.a
libzzoglpg_LDFLAGS= @SHARED_LDFLAGS@
libzzoglpg_LDFLAGS+=-Wl,-soname,@ZEROGS_SONAME@
libzzoglpg_LDADD=$(libzzoglpg_a_OBJECTS)
libzzoglpg_a_SOURCES = \
GSmain.cpp GifTransfer.cpp memcpy_amd.cpp Regs.cpp x86.cpp zpipe.cpp Mem.cpp \
GSmain.cpp GLWinX11.cpp GifTransfer.cpp memcpy_amd.cpp Regs.cpp x86.cpp zpipe.cpp Mem.cpp \
rasterfont.cpp targets.cpp zerogs.cpp ZZoglVB.cpp ZZoglShoots.cpp ZZoglCreate.cpp \
ZZoglShaders.cpp ZZoglCRTC.cpp ZZoglSave.cpp ZZoglFlush.cpp \
Mem_Swizzle.h Mem_Tables.cpp Mem_Transmit.h
Mem_Swizzle.h Mem_Tables.cpp Mem_Transmit.h Mem_Swizzle.cpp
libzzoglpg_a_SOURCES += x86-32.S
@ -36,7 +36,7 @@ if SSE2
CCASFLAGS+= -DZEROGS_SSE2
endif
libzzoglpg_a_DEPENDENCIES = Linux/libZeroGSLinux.a
libzzoglpg_LDADD += Linux/libZeroGSLinux.a
libzzoglpg_a_DEPENDENCIES = Linux/libZeroGSLinux.a ../../../deps/release/libUtilities.a
libzzoglpg_LDADD += Linux/libZeroGSLinux.a ../../../deps/release/libUtilities.a
SUBDIRS = Linux ZeroGSShaders .

View File

@ -75,11 +75,10 @@ MEMCPY_AMD.CPP
#include <windows.h>
#endif
#if defined(_MSC_VER) && !defined(__x86_64__)
extern "C" {
#include "PS2Etypes.h"
#if defined(_MSC_VER) && !defined(__x86_64__)
void * memcpy_amd(void *dest, const void *src, size_t n)
{
__asm {
@ -460,19 +459,21 @@ End:
}
}
}
#else // _MSC_VER
// assume gcc or mingw or win x64
#include <memory.h>
#include <string.h>
void * memcpy_amd(void *dest, const void *src, size_t n)
{
memcpy(dest, src, n);
return dest;
}
#include "PS2Etypes.h"
#include "Utilities/MemcpyFast.h"
//void * memcpy_amd(void *dest, const void *src, size_t n)
//{
// //memcpy(dest, src, n);
// memcpy_fast(dest, src, n);
// return dest;
//}
#define memcpy_amd memcpy_fast
#endif
}

View File

@ -23,174 +23,174 @@
## ~10 times faster than standard memcmp
## (zerofrog)
#u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize)
.globl memcmp_mmx
.type memcmp_mmx, @function
memcmp_mmx:
push %esi
mov %ecx, dword ptr [%esp+16]
mov %edx, dword ptr [%esp+8]
mov %esi, dword ptr [%esp+12]
cmp %ecx, 32
jl Done4
// custom test first 8 to make sure things are ok
movq %mm0, [%esi]
movq %mm1, [%esi+8]
pcmpeqd %mm0, [%edx]
pcmpeqd %mm1, [%edx+8]
pand %mm0, %mm1
movq %mm2, [%esi+16]
pmovmskb %eax, %mm0
movq %mm3, [%esi+24]
// check if eq
cmp %eax, 0xff
je NextComp
mov %eax, 1
jmp End
NextComp:
pcmpeqd %mm2, [%edx+16]
pcmpeqd %mm3, [%edx+24]
pand %mm2, %mm3
pmovmskb %eax, %mm2
sub %ecx, 32
add %esi, 32
add %edx, 32
// check if eq
cmp %eax, 0xff
je ContinueTest
mov %eax, 1
jmp End
cmp %ecx, 64
jl Done8
Cmp8:
movq %mm0, [%esi]
movq %mm1, [%esi+8]
movq %mm2, [%esi+16]
movq %mm3, [%esi+24]
movq %mm4, [%esi+32]
movq %mm5, [%esi+40]
movq %mm6, [%esi+48]
movq %mm7, [%esi+56]
pcmpeqd %mm0, [%edx]
pcmpeqd %mm1, [%edx+8]
pcmpeqd %mm2, [%edx+16]
pcmpeqd %mm3, [%edx+24]
pand %mm0, %mm1
pcmpeqd %mm4, [%edx+32]
pand %mm0, %mm2
pcmpeqd %mm5, [%edx+40]
pand %mm0, %mm3
pcmpeqd %mm6, [%edx+48]
pand %mm0, %mm4
pcmpeqd %mm7, [%edx+56]
pand %mm0, %mm5
pand %mm0, %mm6
pand %mm0, %mm7
pmovmskb %eax, %mm0
// check if eq
cmp %eax, 0xff
je Continue
mov %eax, 1
jmp End
Continue:
sub %ecx, 64
add %esi, 64
add %edx, 64
ContinueTest:
cmp %ecx, 64
jge Cmp8
Done8:
test %ecx, 0x20
jz Done4
movq %mm0, [%esi]
movq %mm1, [%esi+8]
movq %mm2, [%esi+16]
movq %mm3, [%esi+24]
pcmpeqd %mm0, [%edx]
pcmpeqd %mm1, [%edx+8]
pcmpeqd %mm2, [%edx+16]
pcmpeqd %mm3, [%edx+24]
pand %mm0, %mm1
pand %mm0, %mm2
pand %mm0, %mm3
pmovmskb %eax, %mm0
sub %ecx, 32
add %esi, 32
add %edx, 32
// check if eq
cmp %eax, 0xff
je Done4
mov %eax, 1
jmp End
Done4:
cmp %ecx, 24
jne Done2
movq %mm0, [%esi]
movq %mm1, [%esi+8]
movq %mm2, [%esi+16]
pcmpeqd %mm0, [%edx]
pcmpeqd %mm1, [%edx+8]
pcmpeqd %mm2, [%edx+16]
pand %mm0, %mm1
pand %mm0, %mm2
pmovmskb %eax, %mm0
// check if eq
cmp %eax, 0xff
setne %al
jmp End
Done2:
cmp %ecx, 16
jne Done1
movq %mm0, [%esi]
movq %mm1, [%esi+8]
pcmpeqd %mm0, [%edx]
pcmpeqd %mm1, [%edx+8]
pand %mm0, %mm1
pmovmskb %eax, %mm0
// check if eq
cmp %eax, 0xff
setne %al
jmp End
Done1:
cmp %ecx, 8
jne Done
mov %eax, [%esi]
mov %esi, [%esi+4]
cmp %eax, [%edx]
je Next
mov %eax, 1
jmp End
Next:
cmp %esi, [%edx+4]
setne %al
jmp End
Done:
xor %eax, %eax
End:
pop %esi
emms
ret
#.globl memcmp_mmx
# .type memcmp_mmx, @function
#memcmp_mmx:
# push %esi
# mov %ecx, dword ptr [%esp+16]
# mov %edx, dword ptr [%esp+8]
# mov %esi, dword ptr [%esp+12]
#
# cmp %ecx, 32
# jl Done4
#
# // custom test first 8 to make sure things are ok
# movq %mm0, [%esi]
# movq %mm1, [%esi+8]
# pcmpeqd %mm0, [%edx]
# pcmpeqd %mm1, [%edx+8]
# pand %mm0, %mm1
# movq %mm2, [%esi+16]
# pmovmskb %eax, %mm0
# movq %mm3, [%esi+24]
#
# // check if eq
# cmp %eax, 0xff
# je NextComp
# mov %eax, 1
# jmp End
#
#NextComp:
# pcmpeqd %mm2, [%edx+16]
# pcmpeqd %mm3, [%edx+24]
# pand %mm2, %mm3
# pmovmskb %eax, %mm2
#
# sub %ecx, 32
# add %esi, 32
# add %edx, 32
#
# // check if eq
# cmp %eax, 0xff
# je ContinueTest
# mov %eax, 1
# jmp End
#
# cmp %ecx, 64
# jl Done8
#
#Cmp8:
# movq %mm0, [%esi]
# movq %mm1, [%esi+8]
# movq %mm2, [%esi+16]
# movq %mm3, [%esi+24]
# movq %mm4, [%esi+32]
# movq %mm5, [%esi+40]
# movq %mm6, [%esi+48]
# movq %mm7, [%esi+56]
# pcmpeqd %mm0, [%edx]
# pcmpeqd %mm1, [%edx+8]
# pcmpeqd %mm2, [%edx+16]
# pcmpeqd %mm3, [%edx+24]
# pand %mm0, %mm1
# pcmpeqd %mm4, [%edx+32]
# pand %mm0, %mm2
# pcmpeqd %mm5, [%edx+40]
# pand %mm0, %mm3
# pcmpeqd %mm6, [%edx+48]
# pand %mm0, %mm4
# pcmpeqd %mm7, [%edx+56]
# pand %mm0, %mm5
# pand %mm0, %mm6
# pand %mm0, %mm7
# pmovmskb %eax, %mm0
#
# // check if eq
# cmp %eax, 0xff
# je Continue
# mov %eax, 1
# jmp End
#
#Continue:
# sub %ecx, 64
# add %esi, 64
# add %edx, 64
#ContinueTest:
# cmp %ecx, 64
# jge Cmp8
#
#Done8:
# test %ecx, 0x20
# jz Done4
# movq %mm0, [%esi]
# movq %mm1, [%esi+8]
# movq %mm2, [%esi+16]
# movq %mm3, [%esi+24]
# pcmpeqd %mm0, [%edx]
# pcmpeqd %mm1, [%edx+8]
# pcmpeqd %mm2, [%edx+16]
# pcmpeqd %mm3, [%edx+24]
# pand %mm0, %mm1
# pand %mm0, %mm2
# pand %mm0, %mm3
# pmovmskb %eax, %mm0
# sub %ecx, 32
# add %esi, 32
# add %edx, 32
#
# // check if eq
# cmp %eax, 0xff
# je Done4
# mov %eax, 1
# jmp End
#
#Done4:
# cmp %ecx, 24
# jne Done2
# movq %mm0, [%esi]
# movq %mm1, [%esi+8]
# movq %mm2, [%esi+16]
# pcmpeqd %mm0, [%edx]
# pcmpeqd %mm1, [%edx+8]
# pcmpeqd %mm2, [%edx+16]
# pand %mm0, %mm1
# pand %mm0, %mm2
# pmovmskb %eax, %mm0
#
# // check if eq
# cmp %eax, 0xff
# setne %al
# jmp End
#
#Done2:
# cmp %ecx, 16
# jne Done1
#
# movq %mm0, [%esi]
# movq %mm1, [%esi+8]
# pcmpeqd %mm0, [%edx]
# pcmpeqd %mm1, [%edx+8]
# pand %mm0, %mm1
# pmovmskb %eax, %mm0
#
# // check if eq
# cmp %eax, 0xff
# setne %al
# jmp End
#
#Done1:
# cmp %ecx, 8
# jne Done
#
# mov %eax, [%esi]
# mov %esi, [%esi+4]
# cmp %eax, [%edx]
# je Next
# mov %eax, 1
# jmp End
#
#Next:
# cmp %esi, [%edx+4]
# setne %al
# jmp End
#
#Done:
# xor %eax, %eax
#
#End:
# pop %esi
# emms
# ret
#ifdef ZEROGS_SSE2