diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 8685e1e147..a2f65ed38f 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -49,7 +49,8 @@ microVUf(void) mVUinit(VURegs* vuRegsPtr) { mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache)); - + + mVUemitSearch(); mVUreset(); } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 2fb0623027..08a794100f 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -32,6 +32,7 @@ union regInfo { #pragma pack(1) #pragma warning(disable:4996) #endif + __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares u32 needExactMatch; // If set, block needs an exact match of pipeline state u8 q; @@ -44,6 +45,16 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares u8 padding[7]; // 160 bytes #if defined(_MSC_VER) }; +#else +} __attribute__((packed)); +#endif + +__declspec(align(16)) struct microBlock { + microRegInfo pState; // Detailed State of Pipeline + microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) + u8* x86ptrStart; // Start of code +#if defined(_MSC_VER) +}; #pragma pack() #else } __attribute__((packed)); @@ -60,12 +71,6 @@ struct microTempRegInfo { u8 xgkick; // Holds the cycle info for XGkick }; -__declspec(align(16)) struct microBlock { - microRegInfo pState; // Detailed State of Pipeline - microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) - u8* x86ptrStart; // Start of code -}; - struct microVFreg { u8 reg; // Reg Index u8 x; // X vector read/written to? @@ -132,8 +137,8 @@ struct microOp { template struct microIR { - microBlock* pBlock; // Pointer to a block in mVUblocks microBlock block; // Block/Pipeline info + microBlock* pBlock; // Pointer to a block in mVUblocks microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) microOp info[pSize/2]; // Info for Instructions in current block u8 branch; diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 56bc34e72c..682c5407c6 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -161,16 +161,13 @@ declareAllVariables #define pass4 if (recPass == 3) // Define mVUquickSearch -#if defined(_MSC_VER) -extern u32 __fastcall mVUsearchXMM(void *dest, void *src); -#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf) -#else -#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size)) -// Note: GCC might not guarantee alignment on microRegInfo, -// so to be safe I'm using normal memcmp. If at least 8-byte -// alignment is guaranteed, the function below is faster. -// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size)) -#endif +extern u8 mVUsearchXMM[0x1000]; +typedef u32 (__fastcall *mVUCall)(void*, void*); +#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf) +// Note: If GCC builds crash with above function, it means +// that they're not guaranteeing 16-byte alignment on the structs +// being compared. So use this function instead: +// #define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size)) // Misc Macros... #define mVUprogI mVU->prog.prog[progIndex] diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index e2379dac43..ca60c20b33 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -438,100 +438,55 @@ void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { // Micro VU - Custom Quick Search //------------------------------------------------------------------ -#ifndef __LINUX__ +PCSX2_ALIGNED(0x1000, static u8 mVUsearchXMM[0x1000]); + // Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!) -static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) { +void mVUemitSearch() { + using namespace x86Emitter; + HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false); + memset_8<0xcc,0x1000>(mVUsearchXMM); + xSetPtr(mVUsearchXMM); - __asm { - movaps xmm0, [ecx] - pcmpeqd xmm0, [edx] - movaps xmm1, [ecx + 0x10] - pcmpeqd xmm1, [edx + 0x10] - pand xmm0, xmm1 + xMOVAPS (xmm0, ptr32[ecx]); + xPCMP.EQD(xmm0, ptr32[edx]); + xMOVAPS (xmm1, ptr32[ecx + 0x10]); + xPCMP.EQD(xmm1, ptr32[edx + 0x10]); + xPAND (xmm0, xmm1); - movmskps eax, xmm0 - cmp eax, 0xf - jl exitPoint + xMOVMSKPS(eax, xmm0); + xCMP (eax, 0xf); + xForwardJL8 exitPoint; - movaps xmm0, [ecx + 0x20] - pcmpeqd xmm0, [edx + 0x20] - movaps xmm1, [ecx + 0x30] - pcmpeqd xmm1, [edx + 0x30] - pand xmm0, xmm1 + xMOVAPS (xmm0, ptr32[ecx + 0x20]); + xPCMP.EQD(xmm0, ptr32[edx + 0x20]); + xMOVAPS (xmm1, ptr32[ecx + 0x30]); + xPCMP.EQD(xmm1, ptr32[edx + 0x30]); + xPAND (xmm0, xmm1); - movaps xmm2, [ecx + 0x40] - pcmpeqd xmm2, [edx + 0x40] - movaps xmm3, [ecx + 0x50] - pcmpeqd xmm3, [edx + 0x50] - pand xmm2, xmm3 + xMOVAPS (xmm2, ptr32[ecx + 0x40]); + xPCMP.EQD(xmm2, ptr32[edx + 0x40]); + xMOVAPS (xmm3, ptr32[ecx + 0x50]); + xPCMP.EQD(xmm3, ptr32[edx + 0x50]); + xPAND (xmm2, xmm3); - movaps xmm4, [ecx + 0x60] - pcmpeqd xmm4, [edx + 0x60] - movaps xmm5, [ecx + 0x70] - pcmpeqd xmm5, [edx + 0x70] - pand xmm4, xmm5 + xMOVAPS (xmm4, ptr32[ecx + 0x60]); + xPCMP.EQD(xmm4, ptr32[edx + 0x60]); + xMOVAPS (xmm5, ptr32[ecx + 0x70]); + xPCMP.EQD(xmm5, ptr32[edx + 0x70]); + xPAND (xmm4, xmm5); - movaps xmm6, [ecx + 0x80] - pcmpeqd xmm6, [edx + 0x80] - movaps xmm7, [ecx + 0x90] - pcmpeqd xmm7, [edx + 0x90] - pand xmm6, xmm7 + xMOVAPS (xmm6, ptr32[ecx + 0x80]); + xPCMP.EQD(xmm6, ptr32[edx + 0x80]); + xMOVAPS (xmm7, ptr32[ecx + 0x90]); + xPCMP.EQD(xmm7, ptr32[edx + 0x90]); + xPAND (xmm6, xmm7); - pand xmm0, xmm2 - pand xmm4, xmm6 - pand xmm0, xmm4 - movmskps eax, xmm0 -exitPoint: - ret - } + xPAND (xmm0, xmm2); + xPAND (xmm4, xmm6); + xPAND (xmm0, xmm4); + xMOVMSKPS(eax, xmm0); + + exitPoint.SetTarget(); + xRET(); + HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true ); } -#else -// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!) -static u32 __fastcall mVUsearchXMM(void *dest, void *src) -{ - __asm__ - ( - ".intel_syntax noprefix\n" - "movaps xmm0, [ecx]\n" - "pcmpeqd xmm0, [edx]\n" - "movaps xmm1, [ecx + 0x10]\n" - "pcmpeqd xmm1, [edx + 0x10]\n" - "pand xmm0, xmm1\n" - - "movmskps eax, xmm0\n" - "cmp eax, 0xf\n" - "jl exitPoint\n" - - "movaps xmm0, [ecx + 0x20]\n" - "pcmpeqd xmm0, [edx + 0x20]\n" - "movaps xmm1, [ecx + 0x30]\n" - "pcmpeqd xmm1, [edx + 0x30]\n" - "pand xmm0, xmm1\n" - - "movaps xmm2, [ecx + 0x40]\n" - "pcmpeqd xmm2, [edx + 0x40]\n" - "movaps xmm3, [ecx + 0x50]\n" - "pcmpeqd xmm3, [edx + 0x50]\n" - "pand xmm2, xmm3\n" - - "movaps xmm4, [ecx + 0x60]\n" - "pcmpeqd xmm4, [edx + 0x60]\n" - "movaps xmm5, [ecx + 0x70]\n" - "pcmpeqd xmm5, [edx + 0x70]\n" - "pand xmm4, xmm5\n" - - "movaps xmm6, [ecx + 0x80]\n" - "pcmpeqd xmm6, [edx + 0x80]\n" - "movaps xmm7, [ecx + 0x90]\n" - "pcmpeqd xmm7, [edx + 0x90]\n" - "pand xmm6, xmm7\n" - - "pand xmm0, xmm2\n" - "pand xmm4, xmm6\n" - "pand xmm0, xmm4\n" - "movmskps eax, xmm0\n" -"exitPoint:\n" - ".att_syntax\n" - ); -} -#endif