mirror of https://github.com/PCSX2/pcsx2.git
x86/microVU: Use AVX2 for full block comparisons
This commit is contained in:
parent
bf95193d5b
commit
197d4d1c81
|
@ -36,6 +36,14 @@
|
|||
#error PCSX2 requires compiling for at least SSE 4.1
|
||||
#endif
|
||||
|
||||
// Require 32 bit alignment for vectors for AVX2.
|
||||
#if _M_SSE >= 0x501
|
||||
#define SSE_ALIGN_N 32
|
||||
#else
|
||||
#define SSE_ALIGN_N 16
|
||||
#endif
|
||||
#define SSE_ALIGN alignas(SSE_ALIGN_N)
|
||||
|
||||
// Starting with AVX, processors have fast unaligned loads
|
||||
// Reduce code duplication by not compiling multiple versions
|
||||
#if _M_SSE >= 0x500
|
||||
|
|
|
@ -92,7 +92,7 @@ public:
|
|||
|
||||
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
|
||||
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
|
||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
|
||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), SSE_ALIGN_N);
|
||||
newBlock->block.jumpCache = NULL;
|
||||
newBlock->next = NULL;
|
||||
|
||||
|
|
|
@ -198,7 +198,11 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
|
|||
//------------------------------------------------------------------
|
||||
alignas(__pagesize) extern u8 mVUsearchXMM[__pagesize];
|
||||
typedef u32 (*mVUCall)(void*, void*);
|
||||
#if _M_SSE >= 0x501
|
||||
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xff)
|
||||
#else
|
||||
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
|
||||
#endif
|
||||
#define mVUemitSearch() \
|
||||
{ \
|
||||
mVUcustomSearch(); \
|
||||
|
|
|
@ -537,6 +537,7 @@ void mVUcustomSearch()
|
|||
memset(mVUsearchXMM, 0xcc, __pagesize);
|
||||
xSetPtr(mVUsearchXMM);
|
||||
|
||||
#if _M_SSE < 0x501
|
||||
xMOVAPS (xmm0, ptr32[arg1reg]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
|
||||
|
@ -576,7 +577,34 @@ void mVUcustomSearch()
|
|||
xPAND (xmm0, xmm4);
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
|
||||
#else
|
||||
// We have to use unaligned loads here, because the blocks are only 16 byte aligned.
|
||||
xVMOVUPS(ymm0, ptr[arg1reg]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg]);
|
||||
xVMOVMSKPS(eax, ymm0);
|
||||
xCMP(eax, 0xff);
|
||||
xForwardJB8 exitPoint;
|
||||
|
||||
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
||||
|
||||
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
|
||||
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
|
||||
|
||||
xVMOVUPS(ymm2, ptr[arg1reg + 0x60]);
|
||||
xVPCMP.EQD(ymm2, ymm2, ptr[arg2reg + 0x60]);
|
||||
xVPAND(ymm0, ymm0, ymm1);
|
||||
|
||||
xVMOVUPS(ymm2, ptr[arg1reg + 0x80]);
|
||||
xVPCMP.EQD(ymm2, ymm2, ptr[arg2reg + 0x80]);
|
||||
xVPAND(ymm0, ymm0, ymm2);
|
||||
|
||||
xVMOVMSKPS(eax, ymm0);
|
||||
xVZEROUPPER();
|
||||
#endif
|
||||
|
||||
exitPoint.SetTarget();
|
||||
|
||||
xRET();
|
||||
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ExecOnly());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue