mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Rewrote the custom compare function to use the emitter instead of inline asm. - Set Linux builds to use the function. Note: If this revision causes microVU to crash on Linux, it means GCC isn't guaranteeing 16-byte alignment on microRegInfo and microBlock structs. So it'll need to use normal memcmp instead (see microVU_Misc.h) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1345 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
b61f7cc4d1
commit
2ca4a603b3
|
@ -49,7 +49,8 @@ microVUf(void) mVUinit(VURegs* vuRegsPtr) {
|
|||
|
||||
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
||||
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache));
|
||||
|
||||
|
||||
mVUemitSearch();
|
||||
mVUreset<vuIndex>();
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ union regInfo {
|
|||
#pragma pack(1)
|
||||
#pragma warning(disable:4996)
|
||||
#endif
|
||||
|
||||
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
||||
u32 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 q;
|
||||
|
@ -44,6 +45,16 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
|||
u8 padding[7]; // 160 bytes
|
||||
#if defined(_MSC_VER)
|
||||
};
|
||||
#else
|
||||
} __attribute__((packed));
|
||||
#endif
|
||||
|
||||
__declspec(align(16)) struct microBlock {
|
||||
microRegInfo pState; // Detailed State of Pipeline
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
u8* x86ptrStart; // Start of code
|
||||
#if defined(_MSC_VER)
|
||||
};
|
||||
#pragma pack()
|
||||
#else
|
||||
} __attribute__((packed));
|
||||
|
@ -60,12 +71,6 @@ struct microTempRegInfo {
|
|||
u8 xgkick; // Holds the cycle info for XGkick
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct microBlock {
|
||||
microRegInfo pState; // Detailed State of Pipeline
|
||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||
u8* x86ptrStart; // Start of code
|
||||
};
|
||||
|
||||
struct microVFreg {
|
||||
u8 reg; // Reg Index
|
||||
u8 x; // X vector read/written to?
|
||||
|
@ -132,8 +137,8 @@ struct microOp {
|
|||
|
||||
template<u32 pSize>
|
||||
struct microIR {
|
||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||
microBlock block; // Block/Pipeline info
|
||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||
microOp info[pSize/2]; // Info for Instructions in current block
|
||||
u8 branch;
|
||||
|
|
|
@ -161,16 +161,13 @@ declareAllVariables
|
|||
#define pass4 if (recPass == 3)
|
||||
|
||||
// Define mVUquickSearch
|
||||
#if defined(_MSC_VER)
|
||||
extern u32 __fastcall mVUsearchXMM(void *dest, void *src);
|
||||
#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf)
|
||||
#else
|
||||
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
|
||||
// Note: GCC might not guarantee alignment on microRegInfo,
|
||||
// so to be safe I'm using normal memcmp. If at least 8-byte
|
||||
// alignment is guaranteed, the function below is faster.
|
||||
// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
|
||||
#endif
|
||||
extern u8 mVUsearchXMM[0x1000];
|
||||
typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
|
||||
// Note: If GCC builds crash with above function, it means
|
||||
// that they're not guaranteeing 16-byte alignment on the structs
|
||||
// being compared. So use this function instead:
|
||||
// #define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
|
||||
|
||||
// Misc Macros...
|
||||
#define mVUprogI mVU->prog.prog[progIndex]
|
||||
|
|
|
@ -438,100 +438,55 @@ void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|||
// Micro VU - Custom Quick Search
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#ifndef __LINUX__
|
||||
PCSX2_ALIGNED(0x1000, static u8 mVUsearchXMM[0x1000]);
|
||||
|
||||
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
|
||||
static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) {
|
||||
void mVUemitSearch() {
|
||||
using namespace x86Emitter;
|
||||
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
|
||||
memset_8<0xcc,0x1000>(mVUsearchXMM);
|
||||
xSetPtr(mVUsearchXMM);
|
||||
|
||||
__asm {
|
||||
movaps xmm0, [ecx]
|
||||
pcmpeqd xmm0, [edx]
|
||||
movaps xmm1, [ecx + 0x10]
|
||||
pcmpeqd xmm1, [edx + 0x10]
|
||||
pand xmm0, xmm1
|
||||
xMOVAPS (xmm0, ptr32[ecx]);
|
||||
xPCMP.EQD(xmm0, ptr32[edx]);
|
||||
xMOVAPS (xmm1, ptr32[ecx + 0x10]);
|
||||
xPCMP.EQD(xmm1, ptr32[edx + 0x10]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
movmskps eax, xmm0
|
||||
cmp eax, 0xf
|
||||
jl exitPoint
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xCMP (eax, 0xf);
|
||||
xForwardJL8 exitPoint;
|
||||
|
||||
movaps xmm0, [ecx + 0x20]
|
||||
pcmpeqd xmm0, [edx + 0x20]
|
||||
movaps xmm1, [ecx + 0x30]
|
||||
pcmpeqd xmm1, [edx + 0x30]
|
||||
pand xmm0, xmm1
|
||||
xMOVAPS (xmm0, ptr32[ecx + 0x20]);
|
||||
xPCMP.EQD(xmm0, ptr32[edx + 0x20]);
|
||||
xMOVAPS (xmm1, ptr32[ecx + 0x30]);
|
||||
xPCMP.EQD(xmm1, ptr32[edx + 0x30]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
movaps xmm2, [ecx + 0x40]
|
||||
pcmpeqd xmm2, [edx + 0x40]
|
||||
movaps xmm3, [ecx + 0x50]
|
||||
pcmpeqd xmm3, [edx + 0x50]
|
||||
pand xmm2, xmm3
|
||||
xMOVAPS (xmm2, ptr32[ecx + 0x40]);
|
||||
xPCMP.EQD(xmm2, ptr32[edx + 0x40]);
|
||||
xMOVAPS (xmm3, ptr32[ecx + 0x50]);
|
||||
xPCMP.EQD(xmm3, ptr32[edx + 0x50]);
|
||||
xPAND (xmm2, xmm3);
|
||||
|
||||
movaps xmm4, [ecx + 0x60]
|
||||
pcmpeqd xmm4, [edx + 0x60]
|
||||
movaps xmm5, [ecx + 0x70]
|
||||
pcmpeqd xmm5, [edx + 0x70]
|
||||
pand xmm4, xmm5
|
||||
xMOVAPS (xmm4, ptr32[ecx + 0x60]);
|
||||
xPCMP.EQD(xmm4, ptr32[edx + 0x60]);
|
||||
xMOVAPS (xmm5, ptr32[ecx + 0x70]);
|
||||
xPCMP.EQD(xmm5, ptr32[edx + 0x70]);
|
||||
xPAND (xmm4, xmm5);
|
||||
|
||||
movaps xmm6, [ecx + 0x80]
|
||||
pcmpeqd xmm6, [edx + 0x80]
|
||||
movaps xmm7, [ecx + 0x90]
|
||||
pcmpeqd xmm7, [edx + 0x90]
|
||||
pand xmm6, xmm7
|
||||
xMOVAPS (xmm6, ptr32[ecx + 0x80]);
|
||||
xPCMP.EQD(xmm6, ptr32[edx + 0x80]);
|
||||
xMOVAPS (xmm7, ptr32[ecx + 0x90]);
|
||||
xPCMP.EQD(xmm7, ptr32[edx + 0x90]);
|
||||
xPAND (xmm6, xmm7);
|
||||
|
||||
pand xmm0, xmm2
|
||||
pand xmm4, xmm6
|
||||
pand xmm0, xmm4
|
||||
movmskps eax, xmm0
|
||||
exitPoint:
|
||||
ret
|
||||
}
|
||||
xPAND (xmm0, xmm2);
|
||||
xPAND (xmm4, xmm6);
|
||||
xPAND (xmm0, xmm4);
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
|
||||
exitPoint.SetTarget();
|
||||
xRET();
|
||||
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true );
|
||||
}
|
||||
#else
|
||||
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
|
||||
static u32 __fastcall mVUsearchXMM(void *dest, void *src)
|
||||
{
|
||||
__asm__
|
||||
(
|
||||
".intel_syntax noprefix\n"
|
||||
"movaps xmm0, [ecx]\n"
|
||||
"pcmpeqd xmm0, [edx]\n"
|
||||
"movaps xmm1, [ecx + 0x10]\n"
|
||||
"pcmpeqd xmm1, [edx + 0x10]\n"
|
||||
"pand xmm0, xmm1\n"
|
||||
|
||||
"movmskps eax, xmm0\n"
|
||||
"cmp eax, 0xf\n"
|
||||
"jl exitPoint\n"
|
||||
|
||||
"movaps xmm0, [ecx + 0x20]\n"
|
||||
"pcmpeqd xmm0, [edx + 0x20]\n"
|
||||
"movaps xmm1, [ecx + 0x30]\n"
|
||||
"pcmpeqd xmm1, [edx + 0x30]\n"
|
||||
"pand xmm0, xmm1\n"
|
||||
|
||||
"movaps xmm2, [ecx + 0x40]\n"
|
||||
"pcmpeqd xmm2, [edx + 0x40]\n"
|
||||
"movaps xmm3, [ecx + 0x50]\n"
|
||||
"pcmpeqd xmm3, [edx + 0x50]\n"
|
||||
"pand xmm2, xmm3\n"
|
||||
|
||||
"movaps xmm4, [ecx + 0x60]\n"
|
||||
"pcmpeqd xmm4, [edx + 0x60]\n"
|
||||
"movaps xmm5, [ecx + 0x70]\n"
|
||||
"pcmpeqd xmm5, [edx + 0x70]\n"
|
||||
"pand xmm4, xmm5\n"
|
||||
|
||||
"movaps xmm6, [ecx + 0x80]\n"
|
||||
"pcmpeqd xmm6, [edx + 0x80]\n"
|
||||
"movaps xmm7, [ecx + 0x90]\n"
|
||||
"pcmpeqd xmm7, [edx + 0x90]\n"
|
||||
"pand xmm6, xmm7\n"
|
||||
|
||||
"pand xmm0, xmm2\n"
|
||||
"pand xmm4, xmm6\n"
|
||||
"pand xmm0, xmm4\n"
|
||||
"movmskps eax, xmm0\n"
|
||||
"exitPoint:\n"
|
||||
".att_syntax\n"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue