From 33e0ac729e9b56c68871064303bb1525351623ff Mon Sep 17 00:00:00 2001 From: "arcum42@gmail.com" Date: Sun, 20 Mar 2022 03:22:03 -0700 Subject: [PATCH] Core: Remove memcmp_mmx. --- common/CMakeLists.txt | 1 - common/MemcpyFast.h | 8 -- common/common.vcxproj | 1 - common/common.vcxproj.filters | 3 - common/x86/MemcpyFast.cpp | 188 ---------------------------------- pcsx2/Vif_Codes.cpp | 2 +- pcsx2/x86/microVU.cpp | 4 +- pcsx2/x86/microVU_Compile.inl | 2 +- 8 files changed, 4 insertions(+), 205 deletions(-) delete mode 100644 common/x86/MemcpyFast.cpp diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 69a2728f38..608af15665 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -53,7 +53,6 @@ target_sources(common PRIVATE emitter/LnxCpuDetect.cpp emitter/WinCpuDetect.cpp emitter/x86emitter.cpp - x86/MemcpyFast.cpp Darwin/DarwinThreads.cpp Darwin/DarwinMisc.cpp Darwin/DarwinSemaphore.cpp diff --git a/common/MemcpyFast.h b/common/MemcpyFast.h index a42afc4b4e..29b733cd31 100644 --- a/common/MemcpyFast.h +++ b/common/MemcpyFast.h @@ -19,14 +19,6 @@ #include "common/Pcsx2Types.h" #include "common/Pcsx2Defs.h" -// For 32-bit MSVC compiles, memcmp performs much worse than memcmp_mmx and -// other implementations. So for this combination only, prefer memcmp_mmx -#if defined(_MSC_VER) && !defined(_M_X86_64) -extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize); -#else -#define memcmp_mmx memcmp -#endif - // This method can clear any object-like entity -- which is anything that is not a pointer. // Structures, static arrays, etc. No need to include sizeof() crap, this does it automatically // for you! diff --git a/common/common.vcxproj b/common/common.vcxproj index 2005697e64..ee2b5c2cfa 100644 --- a/common/common.vcxproj +++ b/common/common.vcxproj @@ -78,7 +78,6 @@ - diff --git a/common/common.vcxproj.filters b/common/common.vcxproj.filters index 764ed08887..ceddb7448a 100644 --- a/common/common.vcxproj.filters +++ b/common/common.vcxproj.filters @@ -49,9 +49,6 @@ Source Files - - Source Files - Source Files diff --git a/common/x86/MemcpyFast.cpp b/common/x86/MemcpyFast.cpp deleted file mode 100644 index 9a9b4d2de9..0000000000 --- a/common/x86/MemcpyFast.cpp +++ /dev/null @@ -1,188 +0,0 @@ -// GH: AMD memcpy was removed. The remaining part (memcmp_mmx) is likely from Zerofrog. -// Hopefully memcmp_mmx will be dropped in the future. - -#if defined(_WIN32) && !defined(_M_AMD64) -#include "common/MemcpyFast.h" -#include "common/Assertions.h" - -#ifdef _MSC_VER -#pragma warning(disable : 4414) -#endif - -// Inline assembly syntax for use with Visual C++ - -// mmx mem-compare implementation, size has to be a multiple of 8 -// returns 0 is equal, nonzero value if not equal -// ~10 times faster than standard memcmp -// (zerofrog) -u8 memcmp_mmx(const void *src1, const void *src2, int cmpsize) -{ - pxAssert((cmpsize & 7) == 0); - - __asm { - mov ecx, cmpsize - mov edx, src1 - mov esi, src2 - - cmp ecx, 32 - jl Done4 - - // custom test first 8 to make sure things are ok - movq mm0, [esi] - movq mm1, [esi+8] - pcmpeqd mm0, [edx] - pcmpeqd mm1, [edx+8] - pand mm0, mm1 - movq mm2, [esi+16] - pmovmskb eax, mm0 - movq mm3, [esi+24] - - // check if eq - cmp eax, 0xff - je NextComp - mov eax, 1 - jmp End - -NextComp: - pcmpeqd mm2, [edx+16] - pcmpeqd mm3, [edx+24] - pand mm2, mm3 - pmovmskb eax, mm2 - - sub ecx, 32 - add esi, 32 - add edx, 32 - - // check if eq - cmp eax, 0xff - je ContinueTest - mov eax, 1 - jmp End - - cmp ecx, 64 - jl Done8 - -Cmp8: - movq mm0, [esi] - movq mm1, [esi+8] - movq mm2, [esi+16] - movq mm3, [esi+24] - movq mm4, [esi+32] - movq mm5, [esi+40] - movq mm6, [esi+48] - movq mm7, [esi+56] - pcmpeqd mm0, [edx] - pcmpeqd mm1, [edx+8] - pcmpeqd mm2, [edx+16] - pcmpeqd mm3, [edx+24] - pand mm0, mm1 - pcmpeqd mm4, [edx+32] - pand mm0, mm2 - pcmpeqd mm5, [edx+40] - pand mm0, mm3 - pcmpeqd mm6, [edx+48] - pand mm0, mm4 - pcmpeqd mm7, [edx+56] - pand mm0, mm5 - pand mm0, mm6 - pand mm0, mm7 - pmovmskb eax, mm0 - - // check if eq - cmp eax, 0xff - je Continue - mov eax, 1 - jmp End - -Continue: - sub ecx, 64 - add esi, 64 - add edx, 64 -ContinueTest: - cmp ecx, 64 - jge Cmp8 - -Done8: - test ecx, 0x20 - jz Done4 - movq mm0, [esi] - movq mm1, [esi+8] - movq mm2, [esi+16] - movq mm3, [esi+24] - pcmpeqd mm0, [edx] - pcmpeqd mm1, [edx+8] - pcmpeqd mm2, [edx+16] - pcmpeqd mm3, [edx+24] - pand mm0, mm1 - pand mm0, mm2 - pand mm0, mm3 - pmovmskb eax, mm0 - sub ecx, 32 - add esi, 32 - add edx, 32 - - // check if eq - cmp eax, 0xff - je Done4 - mov eax, 1 - jmp End - -Done4: - cmp ecx, 24 - jne Done2 - movq mm0, [esi] - movq mm1, [esi+8] - movq mm2, [esi+16] - pcmpeqd mm0, [edx] - pcmpeqd mm1, [edx+8] - pcmpeqd mm2, [edx+16] - pand mm0, mm1 - pand mm0, mm2 - pmovmskb eax, mm0 - - // check if eq - cmp eax, 0xff - setne al - jmp End - -Done2: - cmp ecx, 16 - jne Done1 - - movq mm0, [esi] - movq mm1, [esi+8] - pcmpeqd mm0, [edx] - pcmpeqd mm1, [edx+8] - pand mm0, mm1 - pmovmskb eax, mm0 - - // check if eq - cmp eax, 0xff - setne al - jmp End - -Done1: - cmp ecx, 8 - jne Done - - mov eax, [esi] - mov esi, [esi+4] - cmp eax, [edx] - je Next - mov eax, 1 - jmp End - -Next: - cmp esi, [edx+4] - setne al - jmp End - -Done: - xor eax, eax - -End: - emms - } -} - -#endif diff --git a/pcsx2/Vif_Codes.cpp b/pcsx2/Vif_Codes.cpp index eac471e021..fbaca4f1b5 100644 --- a/pcsx2/Vif_Codes.cpp +++ b/pcsx2/Vif_Codes.cpp @@ -350,7 +350,7 @@ static __fi void _vifCode_MPG(int idx, u32 addr, const u32* data, int size) { //The compare is pretty much a waste of time, likelyhood is that the program isnt there, thats why its copying it. //Faster without. - //if (memcmp_mmx(VUx.Micro + addr, data, size*4)) { + //if (memcmp(VUx.Micro + addr, data, size*4)) { // Clear VU memory before writing! if (!idx) CpuVU0->Clear(addr, size * 4); diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index ba419f0a79..60d96fcfae 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -282,7 +282,7 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg) { if (cmpWholeProg) { - if (memcmp_mmx((u8*)prog.data, mVU.regs().Micro, mVU.microMemSize)) + if (memcmp((u8*)prog.data, mVU.regs().Micro, mVU.microMemSize)) return false; } else @@ -292,7 +292,7 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg) auto cmpOffset = [&](void* x) { return (u8*)x + range.start; }; if ((range.start < 0) || (range.end < 0)) DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end); - if (memcmp_mmx(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start))) + if (memcmp(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start))) return false; } } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 3a430303b0..d86320b685 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -35,7 +35,7 @@ __fi void mVUcheckIsSame(mV) { if (mVU.prog.isSame == -1) { - mVU.prog.isSame = !memcmp_mmx((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize); + mVU.prog.isSame = !memcmp((u8*)mVUcurProg.data, mVU.regs().Micro, mVU.microMemSize); } if (mVU.prog.isSame == 0) {