- Rewrote the custom compare function to use the emitter instead of inline asm.
- Set Linux builds to use the function.

Note: If this revision causes microVU to crash on Linux, it means GCC isn't guaranteeing 16-byte alignment on microRegInfo and microBlock structs. So it'll need to use normal memcmp instead (see microVU_Misc.h)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1345 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-06-07 12:04:39 +00:00
parent b61f7cc4d1
commit 2ca4a603b3
4 changed files with 64 additions and 106 deletions

View File

@ -50,6 +50,7 @@ microVUf(void) mVUinit(VURegs* vuRegsPtr) {
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache)); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache));
mVUemitSearch();
mVUreset<vuIndex>(); mVUreset<vuIndex>();
} }

View File

@ -32,6 +32,7 @@ union regInfo {
#pragma pack(1) #pragma pack(1)
#pragma warning(disable:4996) #pragma warning(disable:4996)
#endif #endif
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u32 needExactMatch; // If set, block needs an exact match of pipeline state u32 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q; u8 q;
@ -44,6 +45,16 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u8 padding[7]; // 160 bytes u8 padding[7]; // 160 bytes
#if defined(_MSC_VER) #if defined(_MSC_VER)
}; };
#else
} __attribute__((packed));
#endif
__declspec(align(16)) struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
#if defined(_MSC_VER)
};
#pragma pack() #pragma pack()
#else #else
} __attribute__((packed)); } __attribute__((packed));
@ -60,12 +71,6 @@ struct microTempRegInfo {
u8 xgkick; // Holds the cycle info for XGkick u8 xgkick; // Holds the cycle info for XGkick
}; };
__declspec(align(16)) struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
};
struct microVFreg { struct microVFreg {
u8 reg; // Reg Index u8 reg; // Reg Index
u8 x; // X vector read/written to? u8 x; // X vector read/written to?
@ -132,8 +137,8 @@ struct microOp {
template<u32 pSize> template<u32 pSize>
struct microIR { struct microIR {
microBlock* pBlock; // Pointer to a block in mVUblocks
microBlock block; // Block/Pipeline info microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
microOp info[pSize/2]; // Info for Instructions in current block microOp info[pSize/2]; // Info for Instructions in current block
u8 branch; u8 branch;

View File

@ -161,16 +161,13 @@ declareAllVariables
#define pass4 if (recPass == 3) #define pass4 if (recPass == 3)
// Define mVUquickSearch // Define mVUquickSearch
#if defined(_MSC_VER) extern u8 mVUsearchXMM[0x1000];
extern u32 __fastcall mVUsearchXMM(void *dest, void *src); typedef u32 (__fastcall *mVUCall)(void*, void*);
#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf) #define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
#else // Note: If GCC builds crash with above function, it means
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size)) // that they're not guaranteeing 16-byte alignment on the structs
// Note: GCC might not guarantee alignment on microRegInfo, // being compared. So use this function instead:
// so to be safe I'm using normal memcmp. If at least 8-byte // #define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
// alignment is guaranteed, the function below is faster.
// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
#endif
// Misc Macros... // Misc Macros...
#define mVUprogI mVU->prog.prog[progIndex] #define mVUprogI mVU->prog.prog[progIndex]

View File

@ -438,100 +438,55 @@ void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
// Micro VU - Custom Quick Search // Micro VU - Custom Quick Search
//------------------------------------------------------------------ //------------------------------------------------------------------
#ifndef __LINUX__ PCSX2_ALIGNED(0x1000, static u8 mVUsearchXMM[0x1000]);
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!) // Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) { void mVUemitSearch() {
using namespace x86Emitter;
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
memset_8<0xcc,0x1000>(mVUsearchXMM);
xSetPtr(mVUsearchXMM);
__asm { xMOVAPS (xmm0, ptr32[ecx]);
movaps xmm0, [ecx] xPCMP.EQD(xmm0, ptr32[edx]);
pcmpeqd xmm0, [edx] xMOVAPS (xmm1, ptr32[ecx + 0x10]);
movaps xmm1, [ecx + 0x10] xPCMP.EQD(xmm1, ptr32[edx + 0x10]);
pcmpeqd xmm1, [edx + 0x10] xPAND (xmm0, xmm1);
pand xmm0, xmm1
movmskps eax, xmm0 xMOVMSKPS(eax, xmm0);
cmp eax, 0xf xCMP (eax, 0xf);
jl exitPoint xForwardJL8 exitPoint;
movaps xmm0, [ecx + 0x20] xMOVAPS (xmm0, ptr32[ecx + 0x20]);
pcmpeqd xmm0, [edx + 0x20] xPCMP.EQD(xmm0, ptr32[edx + 0x20]);
movaps xmm1, [ecx + 0x30] xMOVAPS (xmm1, ptr32[ecx + 0x30]);
pcmpeqd xmm1, [edx + 0x30] xPCMP.EQD(xmm1, ptr32[edx + 0x30]);
pand xmm0, xmm1 xPAND (xmm0, xmm1);
movaps xmm2, [ecx + 0x40] xMOVAPS (xmm2, ptr32[ecx + 0x40]);
pcmpeqd xmm2, [edx + 0x40] xPCMP.EQD(xmm2, ptr32[edx + 0x40]);
movaps xmm3, [ecx + 0x50] xMOVAPS (xmm3, ptr32[ecx + 0x50]);
pcmpeqd xmm3, [edx + 0x50] xPCMP.EQD(xmm3, ptr32[edx + 0x50]);
pand xmm2, xmm3 xPAND (xmm2, xmm3);
movaps xmm4, [ecx + 0x60] xMOVAPS (xmm4, ptr32[ecx + 0x60]);
pcmpeqd xmm4, [edx + 0x60] xPCMP.EQD(xmm4, ptr32[edx + 0x60]);
movaps xmm5, [ecx + 0x70] xMOVAPS (xmm5, ptr32[ecx + 0x70]);
pcmpeqd xmm5, [edx + 0x70] xPCMP.EQD(xmm5, ptr32[edx + 0x70]);
pand xmm4, xmm5 xPAND (xmm4, xmm5);
movaps xmm6, [ecx + 0x80] xMOVAPS (xmm6, ptr32[ecx + 0x80]);
pcmpeqd xmm6, [edx + 0x80] xPCMP.EQD(xmm6, ptr32[edx + 0x80]);
movaps xmm7, [ecx + 0x90] xMOVAPS (xmm7, ptr32[ecx + 0x90]);
pcmpeqd xmm7, [edx + 0x90] xPCMP.EQD(xmm7, ptr32[edx + 0x90]);
pand xmm6, xmm7 xPAND (xmm6, xmm7);
pand xmm0, xmm2 xPAND (xmm0, xmm2);
pand xmm4, xmm6 xPAND (xmm4, xmm6);
pand xmm0, xmm4 xPAND (xmm0, xmm4);
movmskps eax, xmm0 xMOVMSKPS(eax, xmm0);
exitPoint:
ret exitPoint.SetTarget();
} xRET();
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true );
} }
#else
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
static u32 __fastcall mVUsearchXMM(void *dest, void *src)
{
__asm__
(
".intel_syntax noprefix\n"
"movaps xmm0, [ecx]\n"
"pcmpeqd xmm0, [edx]\n"
"movaps xmm1, [ecx + 0x10]\n"
"pcmpeqd xmm1, [edx + 0x10]\n"
"pand xmm0, xmm1\n"
"movmskps eax, xmm0\n"
"cmp eax, 0xf\n"
"jl exitPoint\n"
"movaps xmm0, [ecx + 0x20]\n"
"pcmpeqd xmm0, [edx + 0x20]\n"
"movaps xmm1, [ecx + 0x30]\n"
"pcmpeqd xmm1, [edx + 0x30]\n"
"pand xmm0, xmm1\n"
"movaps xmm2, [ecx + 0x40]\n"
"pcmpeqd xmm2, [edx + 0x40]\n"
"movaps xmm3, [ecx + 0x50]\n"
"pcmpeqd xmm3, [edx + 0x50]\n"
"pand xmm2, xmm3\n"
"movaps xmm4, [ecx + 0x60]\n"
"pcmpeqd xmm4, [edx + 0x60]\n"
"movaps xmm5, [ecx + 0x70]\n"
"pcmpeqd xmm5, [edx + 0x70]\n"
"pand xmm4, xmm5\n"
"movaps xmm6, [ecx + 0x80]\n"
"pcmpeqd xmm6, [edx + 0x80]\n"
"movaps xmm7, [ecx + 0x90]\n"
"pcmpeqd xmm7, [edx + 0x90]\n"
"pand xmm6, xmm7\n"
"pand xmm0, xmm2\n"
"pand xmm4, xmm6\n"
"pand xmm0, xmm4\n"
"movmskps eax, xmm0\n"
"exitPoint:\n"
".att_syntax\n"
);
}
#endif