Core: Remove memcmp_mmx.

This commit is contained in:
arcum42@gmail.com 2022-03-20 03:22:03 -07:00 committed by refractionpcsx2
parent 268ecf42c3
commit 33e0ac729e
8 changed files with 4 additions and 205 deletions

View File

@ -53,7 +53,6 @@ target_sources(common PRIVATE
emitter/LnxCpuDetect.cpp emitter/LnxCpuDetect.cpp
emitter/WinCpuDetect.cpp emitter/WinCpuDetect.cpp
emitter/x86emitter.cpp emitter/x86emitter.cpp
x86/MemcpyFast.cpp
Darwin/DarwinThreads.cpp Darwin/DarwinThreads.cpp
Darwin/DarwinMisc.cpp Darwin/DarwinMisc.cpp
Darwin/DarwinSemaphore.cpp Darwin/DarwinSemaphore.cpp

View File

@ -19,14 +19,6 @@
#include "common/Pcsx2Types.h" #include "common/Pcsx2Types.h"
#include "common/Pcsx2Defs.h" #include "common/Pcsx2Defs.h"
// For 32-bit MSVC compiles, memcmp performs much worse than memcmp_mmx and
// other implementations. So for this combination only, prefer memcmp_mmx
#if defined(_MSC_VER) && !defined(_M_X86_64)
extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize);
#else
#define memcmp_mmx memcmp
#endif
// This method can clear any object-like entity -- which is anything that is not a pointer. // This method can clear any object-like entity -- which is anything that is not a pointer.
// Structures, static arrays, etc. No need to include sizeof() crap, this does it automatically // Structures, static arrays, etc. No need to include sizeof() crap, this does it automatically
// for you! // for you!

View File

@ -78,7 +78,6 @@
<ClCompile Include="Vulkan\Texture.cpp" /> <ClCompile Include="Vulkan\Texture.cpp" />
<ClCompile Include="Vulkan\Util.cpp" /> <ClCompile Include="Vulkan\Util.cpp" />
<ClCompile Include="WindowInfo.cpp" /> <ClCompile Include="WindowInfo.cpp" />
<ClCompile Include="x86\MemcpyFast.cpp" />
<ClCompile Include="PathUtils.cpp" /> <ClCompile Include="PathUtils.cpp" />
<ClCompile Include="Perf.cpp" /> <ClCompile Include="Perf.cpp" />
<ClCompile Include="PrecompiledHeader.cpp"> <ClCompile Include="PrecompiledHeader.cpp">

View File

@ -49,9 +49,6 @@
<ClCompile Include="Linux\LnxThreads.cpp"> <ClCompile Include="Linux\LnxThreads.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="x86\MemcpyFast.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="emitter\movs.cpp"> <ClCompile Include="emitter\movs.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>

View File

@ -1,188 +0,0 @@
// GH: AMD memcpy was removed. The remaining part (memcmp_mmx) is likely from Zerofrog.
// Hopefully memcmp_mmx will be dropped in the future.
#if defined(_WIN32) && !defined(_M_AMD64)
#include "common/MemcpyFast.h"
#include "common/Assertions.h"
#ifdef _MSC_VER
#pragma warning(disable : 4414)
#endif
// Inline assembly syntax for use with Visual C++
// mmx mem-compare implementation, size has to be a multiple of 8
// returns 0 is equal, nonzero value if not equal
// ~10 times faster than standard memcmp
// (zerofrog)
u8 memcmp_mmx(const void *src1, const void *src2, int cmpsize)
{
pxAssert((cmpsize & 7) == 0);
__asm {
mov ecx, cmpsize
mov edx, src1
mov esi, src2
cmp ecx, 32
jl Done4
// custom test first 8 to make sure things are ok
movq mm0, [esi]
movq mm1, [esi+8]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pand mm0, mm1
movq mm2, [esi+16]
pmovmskb eax, mm0
movq mm3, [esi+24]
// check if eq
cmp eax, 0xff
je NextComp
mov eax, 1
jmp End
NextComp:
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm2, mm3
pmovmskb eax, mm2
sub ecx, 32
add esi, 32
add edx, 32
// check if eq
cmp eax, 0xff
je ContinueTest
mov eax, 1
jmp End
cmp ecx, 64
jl Done8
Cmp8:
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
movq mm4, [esi+32]
movq mm5, [esi+40]
movq mm6, [esi+48]
movq mm7, [esi+56]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm0, mm1
pcmpeqd mm4, [edx+32]
pand mm0, mm2
pcmpeqd mm5, [edx+40]
pand mm0, mm3
pcmpeqd mm6, [edx+48]
pand mm0, mm4
pcmpeqd mm7, [edx+56]
pand mm0, mm5
pand mm0, mm6
pand mm0, mm7
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
je Continue
mov eax, 1
jmp End
Continue:
sub ecx, 64
add esi, 64
add edx, 64
ContinueTest:
cmp ecx, 64
jge Cmp8
Done8:
test ecx, 0x20
jz Done4
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
movq mm3, [esi+24]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pcmpeqd mm3, [edx+24]
pand mm0, mm1
pand mm0, mm2
pand mm0, mm3
pmovmskb eax, mm0
sub ecx, 32
add esi, 32
add edx, 32
// check if eq
cmp eax, 0xff
je Done4
mov eax, 1
jmp End
Done4:
cmp ecx, 24
jne Done2
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+16]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pcmpeqd mm2, [edx+16]
pand mm0, mm1
pand mm0, mm2
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
setne al
jmp End
Done2:
cmp ecx, 16
jne Done1
movq mm0, [esi]
movq mm1, [esi+8]
pcmpeqd mm0, [edx]
pcmpeqd mm1, [edx+8]
pand mm0, mm1
pmovmskb eax, mm0
// check if eq
cmp eax, 0xff
setne al
jmp End
Done1:
cmp ecx, 8
jne Done
mov eax, [esi]
mov esi, [esi+4]
cmp eax, [edx]
je Next
mov eax, 1
jmp End
Next:
cmp esi, [edx+4]
setne al
jmp End
Done:
xor eax, eax
End:
emms
}
}
#endif

View File

@ -350,7 +350,7 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32* data, int size)
{ {
//The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it. //The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it.
//Faster without. //Faster without.
//if (memcmp_mmx(VUx.Micro + addr, data, size*4)) { //if (memcmp(VUx.Micro + addr, data, size*4)) {
// Clear VU memory before writing! // Clear VU memory before writing!
if (!idx) if (!idx)
CpuVU0->Clear(addr, size * 4); CpuVU0->Clear(addr, size * 4);

View File

@ -282,7 +282,7 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg)
{ {
if (cmpWholeProg) if (cmpWholeProg)
{ {
if (memcmp_mmx((u8*)prog.data, mVU.regs().Micro, mVU.microMemSize)) if (memcmp((u8*)prog.data, mVU.regs().Micro, mVU.microMemSize))
return false; return false;
} }
else else
@ -292,7 +292,7 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg)
auto cmpOffset = [&](void* x) { return (u8*)x + range.start; }; auto cmpOffset = [&](void* x) { return (u8*)x + range.start; };
if ((range.start < 0) || (range.end < 0)) if ((range.start < 0) || (range.end < 0))
DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end); DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end);
if (memcmp_mmx(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start))) if (memcmp(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start)))
return false; return false;
} }
} }

View File

@ -35,7 +35,7 @@ __fi void mVUcheckIsSame(mV)
{ {
if (mVU.prog.isSame == -1) if (mVU.prog.isSame == -1)
{ {
mVU.prog.isSame = !memcmp_mmx((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize); mVU.prog.isSame = !memcmp((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize);
} }
if (mVU.prog.isSame == 0) if (mVU.prog.isSame == 0)
{ {