mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Rewrote the custom compare function to use the emitter instead of inline asm. - Set Linux builds to use the function. Note: If this revision causes microVU to crash on Linux, it means GCC isn't guaranteeing 16-byte alignment on microRegInfo and microBlock structs. So it'll need to use normal memcmp instead (see microVU_Misc.h) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1345 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
b61f7cc4d1
commit
2ca4a603b3
|
@ -50,6 +50,7 @@ microVUf(void) mVUinit(VURegs* vuRegsPtr) {
|
||||||
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
|
||||||
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache));
|
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache));
|
||||||
|
|
||||||
|
mVUemitSearch();
|
||||||
mVUreset<vuIndex>();
|
mVUreset<vuIndex>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ union regInfo {
|
||||||
#pragma pack(1)
|
#pragma pack(1)
|
||||||
#pragma warning(disable:4996)
|
#pragma warning(disable:4996)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
__declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
||||||
u32 needExactMatch; // If set, block needs an exact match of pipeline state
|
u32 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||||
u8 q;
|
u8 q;
|
||||||
|
@ -44,6 +45,16 @@ __declspec(align(16)) struct microRegInfo { // Ordered for Faster Compares
|
||||||
u8 padding[7]; // 160 bytes
|
u8 padding[7]; // 160 bytes
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
};
|
};
|
||||||
|
#else
|
||||||
|
} __attribute__((packed));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__declspec(align(16)) struct microBlock {
|
||||||
|
microRegInfo pState; // Detailed State of Pipeline
|
||||||
|
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
||||||
|
u8* x86ptrStart; // Start of code
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
};
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
#else
|
#else
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
@ -60,12 +71,6 @@ struct microTempRegInfo {
|
||||||
u8 xgkick; // Holds the cycle info for XGkick
|
u8 xgkick; // Holds the cycle info for XGkick
|
||||||
};
|
};
|
||||||
|
|
||||||
__declspec(align(16)) struct microBlock {
|
|
||||||
microRegInfo pState; // Detailed State of Pipeline
|
|
||||||
microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes)
|
|
||||||
u8* x86ptrStart; // Start of code
|
|
||||||
};
|
|
||||||
|
|
||||||
struct microVFreg {
|
struct microVFreg {
|
||||||
u8 reg; // Reg Index
|
u8 reg; // Reg Index
|
||||||
u8 x; // X vector read/written to?
|
u8 x; // X vector read/written to?
|
||||||
|
@ -132,8 +137,8 @@ struct microOp {
|
||||||
|
|
||||||
template<u32 pSize>
|
template<u32 pSize>
|
||||||
struct microIR {
|
struct microIR {
|
||||||
microBlock* pBlock; // Pointer to a block in mVUblocks
|
|
||||||
microBlock block; // Block/Pipeline info
|
microBlock block; // Block/Pipeline info
|
||||||
|
microBlock* pBlock; // Pointer to a block in mVUblocks
|
||||||
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
|
||||||
microOp info[pSize/2]; // Info for Instructions in current block
|
microOp info[pSize/2]; // Info for Instructions in current block
|
||||||
u8 branch;
|
u8 branch;
|
||||||
|
|
|
@ -161,16 +161,13 @@ declareAllVariables
|
||||||
#define pass4 if (recPass == 3)
|
#define pass4 if (recPass == 3)
|
||||||
|
|
||||||
// Define mVUquickSearch
|
// Define mVUquickSearch
|
||||||
#if defined(_MSC_VER)
|
extern u8 mVUsearchXMM[0x1000];
|
||||||
extern u32 __fastcall mVUsearchXMM(void *dest, void *src);
|
typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
#define mVUquickSearch(dest, src, size) (mVUsearchXMM(dest, src) == 0xf)
|
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0xf)
|
||||||
#else
|
// Note: If GCC builds crash with above function, it means
|
||||||
#define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
|
// that they're not guaranteeing 16-byte alignment on the structs
|
||||||
// Note: GCC might not guarantee alignment on microRegInfo,
|
// being compared. So use this function instead:
|
||||||
// so to be safe I'm using normal memcmp. If at least 8-byte
|
// #define mVUquickSearch(dest, src, size) (!memcmp(dest, src, size))
|
||||||
// alignment is guaranteed, the function below is faster.
|
|
||||||
// #define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Misc Macros...
|
// Misc Macros...
|
||||||
#define mVUprogI mVU->prog.prog[progIndex]
|
#define mVUprogI mVU->prog.prog[progIndex]
|
||||||
|
|
|
@ -438,100 +438,55 @@ void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
||||||
// Micro VU - Custom Quick Search
|
// Micro VU - Custom Quick Search
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
#ifndef __LINUX__
|
PCSX2_ALIGNED(0x1000, static u8 mVUsearchXMM[0x1000]);
|
||||||
|
|
||||||
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
|
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
|
||||||
static __declspec(naked) u32 __fastcall mVUsearchXMM(void *dest, void *src) {
|
void mVUemitSearch() {
|
||||||
|
using namespace x86Emitter;
|
||||||
|
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
|
||||||
|
memset_8<0xcc,0x1000>(mVUsearchXMM);
|
||||||
|
xSetPtr(mVUsearchXMM);
|
||||||
|
|
||||||
__asm {
|
xMOVAPS (xmm0, ptr32[ecx]);
|
||||||
movaps xmm0, [ecx]
|
xPCMP.EQD(xmm0, ptr32[edx]);
|
||||||
pcmpeqd xmm0, [edx]
|
xMOVAPS (xmm1, ptr32[ecx + 0x10]);
|
||||||
movaps xmm1, [ecx + 0x10]
|
xPCMP.EQD(xmm1, ptr32[edx + 0x10]);
|
||||||
pcmpeqd xmm1, [edx + 0x10]
|
xPAND (xmm0, xmm1);
|
||||||
pand xmm0, xmm1
|
|
||||||
|
|
||||||
movmskps eax, xmm0
|
xMOVMSKPS(eax, xmm0);
|
||||||
cmp eax, 0xf
|
xCMP (eax, 0xf);
|
||||||
jl exitPoint
|
xForwardJL8 exitPoint;
|
||||||
|
|
||||||
movaps xmm0, [ecx + 0x20]
|
xMOVAPS (xmm0, ptr32[ecx + 0x20]);
|
||||||
pcmpeqd xmm0, [edx + 0x20]
|
xPCMP.EQD(xmm0, ptr32[edx + 0x20]);
|
||||||
movaps xmm1, [ecx + 0x30]
|
xMOVAPS (xmm1, ptr32[ecx + 0x30]);
|
||||||
pcmpeqd xmm1, [edx + 0x30]
|
xPCMP.EQD(xmm1, ptr32[edx + 0x30]);
|
||||||
pand xmm0, xmm1
|
xPAND (xmm0, xmm1);
|
||||||
|
|
||||||
movaps xmm2, [ecx + 0x40]
|
xMOVAPS (xmm2, ptr32[ecx + 0x40]);
|
||||||
pcmpeqd xmm2, [edx + 0x40]
|
xPCMP.EQD(xmm2, ptr32[edx + 0x40]);
|
||||||
movaps xmm3, [ecx + 0x50]
|
xMOVAPS (xmm3, ptr32[ecx + 0x50]);
|
||||||
pcmpeqd xmm3, [edx + 0x50]
|
xPCMP.EQD(xmm3, ptr32[edx + 0x50]);
|
||||||
pand xmm2, xmm3
|
xPAND (xmm2, xmm3);
|
||||||
|
|
||||||
movaps xmm4, [ecx + 0x60]
|
xMOVAPS (xmm4, ptr32[ecx + 0x60]);
|
||||||
pcmpeqd xmm4, [edx + 0x60]
|
xPCMP.EQD(xmm4, ptr32[edx + 0x60]);
|
||||||
movaps xmm5, [ecx + 0x70]
|
xMOVAPS (xmm5, ptr32[ecx + 0x70]);
|
||||||
pcmpeqd xmm5, [edx + 0x70]
|
xPCMP.EQD(xmm5, ptr32[edx + 0x70]);
|
||||||
pand xmm4, xmm5
|
xPAND (xmm4, xmm5);
|
||||||
|
|
||||||
movaps xmm6, [ecx + 0x80]
|
xMOVAPS (xmm6, ptr32[ecx + 0x80]);
|
||||||
pcmpeqd xmm6, [edx + 0x80]
|
xPCMP.EQD(xmm6, ptr32[edx + 0x80]);
|
||||||
movaps xmm7, [ecx + 0x90]
|
xMOVAPS (xmm7, ptr32[ecx + 0x90]);
|
||||||
pcmpeqd xmm7, [edx + 0x90]
|
xPCMP.EQD(xmm7, ptr32[edx + 0x90]);
|
||||||
pand xmm6, xmm7
|
xPAND (xmm6, xmm7);
|
||||||
|
|
||||||
pand xmm0, xmm2
|
xPAND (xmm0, xmm2);
|
||||||
pand xmm4, xmm6
|
xPAND (xmm4, xmm6);
|
||||||
pand xmm0, xmm4
|
xPAND (xmm0, xmm4);
|
||||||
movmskps eax, xmm0
|
xMOVMSKPS(eax, xmm0);
|
||||||
exitPoint:
|
|
||||||
ret
|
exitPoint.SetTarget();
|
||||||
}
|
xRET();
|
||||||
|
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadOnly, true );
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
// Generates a custom optimized block-search function (Note: Structs must be 16-byte aligned!)
|
|
||||||
static u32 __fastcall mVUsearchXMM(void *dest, void *src)
|
|
||||||
{
|
|
||||||
__asm__
|
|
||||||
(
|
|
||||||
".intel_syntax noprefix\n"
|
|
||||||
"movaps xmm0, [ecx]\n"
|
|
||||||
"pcmpeqd xmm0, [edx]\n"
|
|
||||||
"movaps xmm1, [ecx + 0x10]\n"
|
|
||||||
"pcmpeqd xmm1, [edx + 0x10]\n"
|
|
||||||
"pand xmm0, xmm1\n"
|
|
||||||
|
|
||||||
"movmskps eax, xmm0\n"
|
|
||||||
"cmp eax, 0xf\n"
|
|
||||||
"jl exitPoint\n"
|
|
||||||
|
|
||||||
"movaps xmm0, [ecx + 0x20]\n"
|
|
||||||
"pcmpeqd xmm0, [edx + 0x20]\n"
|
|
||||||
"movaps xmm1, [ecx + 0x30]\n"
|
|
||||||
"pcmpeqd xmm1, [edx + 0x30]\n"
|
|
||||||
"pand xmm0, xmm1\n"
|
|
||||||
|
|
||||||
"movaps xmm2, [ecx + 0x40]\n"
|
|
||||||
"pcmpeqd xmm2, [edx + 0x40]\n"
|
|
||||||
"movaps xmm3, [ecx + 0x50]\n"
|
|
||||||
"pcmpeqd xmm3, [edx + 0x50]\n"
|
|
||||||
"pand xmm2, xmm3\n"
|
|
||||||
|
|
||||||
"movaps xmm4, [ecx + 0x60]\n"
|
|
||||||
"pcmpeqd xmm4, [edx + 0x60]\n"
|
|
||||||
"movaps xmm5, [ecx + 0x70]\n"
|
|
||||||
"pcmpeqd xmm5, [edx + 0x70]\n"
|
|
||||||
"pand xmm4, xmm5\n"
|
|
||||||
|
|
||||||
"movaps xmm6, [ecx + 0x80]\n"
|
|
||||||
"pcmpeqd xmm6, [edx + 0x80]\n"
|
|
||||||
"movaps xmm7, [ecx + 0x90]\n"
|
|
||||||
"pcmpeqd xmm7, [edx + 0x90]\n"
|
|
||||||
"pand xmm6, xmm7\n"
|
|
||||||
|
|
||||||
"pand xmm0, xmm2\n"
|
|
||||||
"pand xmm4, xmm6\n"
|
|
||||||
"pand xmm0, xmm4\n"
|
|
||||||
"movmskps eax, xmm0\n"
|
|
||||||
"exitPoint:\n"
|
|
||||||
".att_syntax\n"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
Loading…
Reference in New Issue