x86/microVU: Use AVX2 for full block comparisons

This commit is contained in:
Connor McLaughlin 2022-10-12 17:25:35 +10:00 committed by refractionpcsx2
parent bf95193d5b
commit 197d4d1c81
4 changed files with 41 additions and 1 deletions

View File

@ -36,6 +36,14 @@
#error PCSX2 requires compiling for at least SSE 4.1
#endif
// Require 32 bit alignment for vectors for AVX2.
#if _M_SSE >= 0x501
#define SSE_ALIGN_N 32
#else
#define SSE_ALIGN_N 16
#endif
#define SSE_ALIGN alignas(SSE_ALIGN_N)
// Starting with AVX, processors have fast unaligned loads
// Reduce code duplication by not compiling multiple versions
#if _M_SSE >= 0x500

View File

@ -92,7 +92,7 @@ public:
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), SSE_ALIGN_N);
newBlock->block.jumpCache = NULL;
newBlock->next = NULL;

View File

@ -198,7 +198,11 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
//------------------------------------------------------------------
alignas(__pagesize) extern u8 mVUsearchXMM[__pagesize];
typedef u32 (*mVUCall)(void*, void*);
#if _M_SSE >= 0x501
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xff)
#else
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
#endif
#define mVUemitSearch() \
{ \
mVUcustomSearch(); \

View File

@ -537,6 +537,7 @@ void mVUcustomSearch()
memset(mVUsearchXMM, 0xcc, __pagesize);
xSetPtr(mVUsearchXMM);
#if _M_SSE < 0x501
xMOVAPS (xmm0, ptr32[arg1reg]);
xPCMP.EQD(xmm0, ptr32[arg2reg]);
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
@ -576,7 +577,34 @@ void mVUcustomSearch()
xPAND (xmm0, xmm4);
xMOVMSKPS(eax, xmm0);
#else
// We have to use unaligned loads here, because the blocks are only 16 byte aligned.
xVMOVUPS(ymm0, ptr[arg1reg]);
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg]);
xVMOVMSKPS(eax, ymm0);
xCMP(eax, 0xff);
xForwardJB8 exitPoint;
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
xVMOVUPS(ymm2, ptr[arg1reg + 0x60]);
xVPCMP.EQD(ymm2, ymm2, ptr[arg2reg + 0x60]);
xVPAND(ymm0, ymm0, ymm1);
xVMOVUPS(ymm2, ptr[arg1reg + 0x80]);
xVPCMP.EQD(ymm2, ymm2, ptr[arg2reg + 0x80]);
xVPAND(ymm0, ymm0, ymm2);
xVMOVMSKPS(eax, ymm0);
xVZEROUPPER();
#endif
exitPoint.SetTarget();
xRET();
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ExecOnly());
}