- Rewrote the custom compare function to use the emitter instead of inline asm.
- Set Linux builds to use the function.

Note: If this revision causes microVU to crash on Linux, it means GCC isn't guaranteeing 16-byte alignment on microRegInfo and microBlock structs. So it'll need to use normal memcmp instead (see microVU_Misc.h)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1345 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-06-07 12:04:39 +00:00
parent b61f7cc4d1
commit 2ca4a603b3
4 changed files with 64 additions and 106 deletions

View File

@ -49,7 +49,8 @@ microVUf(void) mVUinit(VURegs* vuRegsPtr) {
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache));
mVUemitSearch();
mVUreset<vuIndex>();
}

View File

@ -32,6 +32,7 @@ union regInfo {
#pragma pack(1)
#pragma warning(disable:4996)
#endif
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u32 needExactMatch; // If set, block needs an exact match of pipeline state
u8 q;
@ -44,6 +45,16 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
u8 padding[7]; // 160 bytes
#if defined(_MSC_VER)
};
#else
} __attribute__((packed));
#endif
__declspec(align(16)) struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
#if defined(_MSC_VER)
};
#pragma pack()
#else
} __attribute__((packed));
@ -60,12 +71,6 @@ struct microTempRegInfo {
u8 xgkick; // Holds the cycle info for XGkick
};
__declspec(align(16)) struct microBlock {
microRegInfo pState; // Detailed State of Pipeline
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
u8* x86ptrStart; // Start of code
};
struct microVFreg {
u8 reg; // Reg Index
u8 x; // X vector read/written to?
@ -132,8 +137,8 @@ struct microOp {
template<u32 pSize>
struct microIR {
microBlock* pBlock; // Pointer to a block in mVUblocks
microBlock block; // Block/Pipeline info
microBlock* pBlock; // Pointer to a block in mVUblocks
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
microOp info[pSize/2]; // Info for Instructions in current block
u8 branch;

View File

@ -161,16 +161,13 @@ declareAllVariables
#define pass4 if (recPass == 3)
// Define mVUquickSearch
#if defined(_MSC_VER)
extern u32 __fastcall mVUsearchXMM(void *dest, void *src);
#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf)
#else
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
// Note: GCC might not guarantee alignment on microRegInfo,
// so to be safe I'm using normal memcmp. If at least 8-byte
// alignment is guaranteed, the function below is faster.
// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
#endif
extern u8 mVUsearchXMM[0x1000];
typedef u32 (__fastcall *mVUCall)(void*, void*);
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
// Note: If GCC builds crash with above function, it means
// that they're not guaranteeing 16-byte alignment on the structs
// being compared. So use this function instead:
// #define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
// Misc Macros...
#define mVUprogI mVU->prog.prog[progIndex]

View File

@ -438,100 +438,55 @@ void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
// Micro VU - Custom Quick Search
//------------------------------------------------------------------
#ifndef __LINUX__
PCSX2_ALIGNED(0x1000, static u8 mVUsearchXMM[0x1000]);
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) {
void mVUemitSearch() {
using namespace x86Emitter;
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
memset_8<0xcc,0x1000>(mVUsearchXMM);
xSetPtr(mVUsearchXMM);
__asm {
movaps xmm0, [ecx]
pcmpeqd xmm0, [edx]
movaps xmm1, [ecx + 0x10]
pcmpeqd xmm1, [edx + 0x10]
pand xmm0, xmm1
xMOVAPS (xmm0, ptr32[ecx]);
xPCMP.EQD(xmm0, ptr32[edx]);
xMOVAPS (xmm1, ptr32[ecx + 0x10]);
xPCMP.EQD(xmm1, ptr32[edx + 0x10]);
xPAND (xmm0, xmm1);
movmskps eax, xmm0
cmp eax, 0xf
jl exitPoint
xMOVMSKPS(eax, xmm0);
xCMP (eax, 0xf);
xForwardJL8 exitPoint;
movaps xmm0, [ecx + 0x20]
pcmpeqd xmm0, [edx + 0x20]
movaps xmm1, [ecx + 0x30]
pcmpeqd xmm1, [edx + 0x30]
pand xmm0, xmm1
xMOVAPS (xmm0, ptr32[ecx + 0x20]);
xPCMP.EQD(xmm0, ptr32[edx + 0x20]);
xMOVAPS (xmm1, ptr32[ecx + 0x30]);
xPCMP.EQD(xmm1, ptr32[edx + 0x30]);
xPAND (xmm0, xmm1);
movaps xmm2, [ecx + 0x40]
pcmpeqd xmm2, [edx + 0x40]
movaps xmm3, [ecx + 0x50]
pcmpeqd xmm3, [edx + 0x50]
pand xmm2, xmm3
xMOVAPS (xmm2, ptr32[ecx + 0x40]);
xPCMP.EQD(xmm2, ptr32[edx + 0x40]);
xMOVAPS (xmm3, ptr32[ecx + 0x50]);
xPCMP.EQD(xmm3, ptr32[edx + 0x50]);
xPAND (xmm2, xmm3);
movaps xmm4, [ecx + 0x60]
pcmpeqd xmm4, [edx + 0x60]
movaps xmm5, [ecx + 0x70]
pcmpeqd xmm5, [edx + 0x70]
pand xmm4, xmm5
xMOVAPS (xmm4, ptr32[ecx + 0x60]);
xPCMP.EQD(xmm4, ptr32[edx + 0x60]);
xMOVAPS (xmm5, ptr32[ecx + 0x70]);
xPCMP.EQD(xmm5, ptr32[edx + 0x70]);
xPAND (xmm4, xmm5);
movaps xmm6, [ecx + 0x80]
pcmpeqd xmm6, [edx + 0x80]
movaps xmm7, [ecx + 0x90]
pcmpeqd xmm7, [edx + 0x90]
pand xmm6, xmm7
xMOVAPS (xmm6, ptr32[ecx + 0x80]);
xPCMP.EQD(xmm6, ptr32[edx + 0x80]);
xMOVAPS (xmm7, ptr32[ecx + 0x90]);
xPCMP.EQD(xmm7, ptr32[edx + 0x90]);
xPAND (xmm6, xmm7);
pand xmm0, xmm2
pand xmm4, xmm6
pand xmm0, xmm4
movmskps eax, xmm0
exitPoint:
ret
}
xPAND (xmm0, xmm2);
xPAND (xmm4, xmm6);
xPAND (xmm0, xmm4);
xMOVMSKPS(eax, xmm0);
exitPoint.SetTarget();
xRET();
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true );
}
#else
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
static u32 __fastcall mVUsearchXMM(void *dest, void *src)
{
__asm__
(
".intel_syntax noprefix\n"
"movaps xmm0, [ecx]\n"
"pcmpeqd xmm0, [edx]\n"
"movaps xmm1, [ecx + 0x10]\n"
"pcmpeqd xmm1, [edx + 0x10]\n"
"pand xmm0, xmm1\n"
"movmskps eax, xmm0\n"
"cmp eax, 0xf\n"
"jl exitPoint\n"
"movaps xmm0, [ecx + 0x20]\n"
"pcmpeqd xmm0, [edx + 0x20]\n"
"movaps xmm1, [ecx + 0x30]\n"
"pcmpeqd xmm1, [edx + 0x30]\n"
"pand xmm0, xmm1\n"
"movaps xmm2, [ecx + 0x40]\n"
"pcmpeqd xmm2, [edx + 0x40]\n"
"movaps xmm3, [ecx + 0x50]\n"
"pcmpeqd xmm3, [edx + 0x50]\n"
"pand xmm2, xmm3\n"
"movaps xmm4, [ecx + 0x60]\n"
"pcmpeqd xmm4, [edx + 0x60]\n"
"movaps xmm5, [ecx + 0x70]\n"
"pcmpeqd xmm5, [edx + 0x70]\n"
"pand xmm4, xmm5\n"
"movaps xmm6, [ecx + 0x80]\n"
"pcmpeqd xmm6, [edx + 0x80]\n"
"movaps xmm7, [ecx + 0x90]\n"
"pcmpeqd xmm7, [edx + 0x90]\n"
"pand xmm6, xmm7\n"
"pand xmm0, xmm2\n"
"pand xmm4, xmm6\n"
"pand xmm0, xmm4\n"
"movmskps eax, xmm0\n"
"exitPoint:\n"
".att_syntax\n"
);
}
#endif