mirror of https://github.com/PCSX2/pcsx2.git
EE:Rec: Allow rec memory anywhere
This commit is contained in:
parent
8a9fbb43e6
commit
fe2f97eeb5
|
@ -49,6 +49,7 @@
|
||||||
|
|
||||||
|
|
||||||
thread_local u8* x86Ptr;
|
thread_local u8* x86Ptr;
|
||||||
|
thread_local u8* xTextPtr;
|
||||||
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
|
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
|
||||||
|
|
||||||
namespace x86Emitter
|
namespace x86Emitter
|
||||||
|
@ -295,13 +296,27 @@ const xRegister32
|
||||||
void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset)
|
void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset)
|
||||||
{
|
{
|
||||||
sptr displacement = (sptr)address;
|
sptr displacement = (sptr)address;
|
||||||
|
sptr textRelative = (sptr)address - (sptr)xTextPtr;
|
||||||
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
|
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
|
||||||
|
// Can we use an 8-bit offset from the text pointer?
|
||||||
|
if (textRelative == (s8)textRelative && xTextPtr)
|
||||||
|
{
|
||||||
|
ModRM(1, regfield, RTEXTPTR.GetId());
|
||||||
|
xWrite<s8>((s8)textRelative);
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
|
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
|
||||||
if (ripRelative == (s32)ripRelative)
|
else if (ripRelative == (s32)ripRelative)
|
||||||
{
|
{
|
||||||
ModRM(0, regfield, ModRm_UseDisp32);
|
ModRM(0, regfield, ModRm_UseDisp32);
|
||||||
displacement = ripRelative;
|
displacement = ripRelative;
|
||||||
}
|
}
|
||||||
|
// How about from the text pointer?
|
||||||
|
else if (textRelative == (s32)textRelative && xTextPtr)
|
||||||
|
{
|
||||||
|
ModRM(2, regfield, RTEXTPTR.GetId());
|
||||||
|
displacement = textRelative;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
|
pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
|
||||||
|
@ -539,6 +554,12 @@ const xRegister32
|
||||||
x86Ptr = (u8*)ptr;
|
x86Ptr = (u8*)ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Assigns the current emitter text base address.
|
||||||
|
__emitinline void xSetTextPtr(void* ptr)
|
||||||
|
{
|
||||||
|
xTextPtr = (u8*)ptr;
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieves the current emitter buffer target address.
|
// Retrieves the current emitter buffer target address.
|
||||||
// This is provided instead of using x86Ptr directly, since we may in the future find
|
// This is provided instead of using x86Ptr directly, since we may in the future find
|
||||||
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
||||||
|
@ -547,6 +568,12 @@ const xRegister32
|
||||||
return x86Ptr;
|
return x86Ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Retrieves the current emitter text base address.
|
||||||
|
__emitinline u8* xGetTextPtr()
|
||||||
|
{
|
||||||
|
return xTextPtr;
|
||||||
|
}
|
||||||
|
|
||||||
__emitinline void xAlignPtr(uint bytes)
|
__emitinline void xAlignPtr(uint bytes)
|
||||||
{
|
{
|
||||||
// forward align
|
// forward align
|
||||||
|
@ -1229,6 +1256,9 @@ const xRegister32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
stackAlign(m_offset, true);
|
stackAlign(m_offset, true);
|
||||||
|
|
||||||
|
if (u8* ptr = xGetTextPtr())
|
||||||
|
xMOV64(RTEXTPTR, (sptr)ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
xScopedStackFrame::~xScopedStackFrame()
|
xScopedStackFrame::~xScopedStackFrame()
|
||||||
|
@ -1285,11 +1315,14 @@ const xRegister32
|
||||||
{
|
{
|
||||||
return offset + base;
|
return offset + base;
|
||||||
}
|
}
|
||||||
else
|
if (u8* ptr = xGetTextPtr())
|
||||||
{
|
{
|
||||||
xLEA(tmpRegister, ptr[base]);
|
sptr tbase = (sptr)base - (sptr)ptr;
|
||||||
return offset + tmpRegister;
|
if (tbase == (s32)tbase)
|
||||||
|
return offset + RTEXTPTR + tbase;
|
||||||
}
|
}
|
||||||
|
xLEA(tmpRegister, ptr[base]);
|
||||||
|
return offset + tmpRegister;
|
||||||
}
|
}
|
||||||
|
|
||||||
void xLoadFarAddr(const xAddressReg& dst, void* addr)
|
void xLoadFarAddr(const xAddressReg& dst, void* addr)
|
||||||
|
|
|
@ -149,11 +149,13 @@ namespace x86Emitter
|
||||||
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
|
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
|
||||||
|
|
||||||
extern void xSetPtr(void* ptr);
|
extern void xSetPtr(void* ptr);
|
||||||
|
extern void xSetTextPtr(void* ptr);
|
||||||
extern void xAlignPtr(uint bytes);
|
extern void xAlignPtr(uint bytes);
|
||||||
extern void xAdvancePtr(uint bytes);
|
extern void xAdvancePtr(uint bytes);
|
||||||
extern void xAlignCallTarget();
|
extern void xAlignCallTarget();
|
||||||
|
|
||||||
extern u8* xGetPtr();
|
extern u8* xGetPtr();
|
||||||
|
extern u8* xGetTextPtr();
|
||||||
extern u8* xGetAlignedCallTarget();
|
extern u8* xGetAlignedCallTarget();
|
||||||
|
|
||||||
extern JccComparisonType xInvertCond(JccComparisonType src);
|
extern JccComparisonType xInvertCond(JccComparisonType src);
|
||||||
|
@ -646,6 +648,8 @@ extern const xRegister32
|
||||||
calleeSavedReg1d,
|
calleeSavedReg1d,
|
||||||
calleeSavedReg2d;
|
calleeSavedReg2d;
|
||||||
|
|
||||||
|
/// Holds a pointer to program text at all times so we don't need to be within 2GB of text
|
||||||
|
static constexpr const xAddressReg& RTEXTPTR = rbx;
|
||||||
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
|
|
@ -890,10 +890,13 @@ static void recReserve()
|
||||||
pxFailRel("Failed to allocate R3000 InstCache array.");
|
pxFailRel("Failed to allocate R3000 InstCache array.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define R3000A_TEXTPTR (&psxRegs.GPR.r[33])
|
||||||
|
|
||||||
void recResetIOP()
|
void recResetIOP()
|
||||||
{
|
{
|
||||||
DevCon.WriteLn("iR3000A Recompiler reset.");
|
DevCon.WriteLn("iR3000A Recompiler reset.");
|
||||||
|
|
||||||
|
xSetTextPtr(R3000A_TEXTPTR);
|
||||||
xSetPtr(SysMemory::GetIOPRec());
|
xSetPtr(SysMemory::GetIOPRec());
|
||||||
_DynGen_Dispatchers();
|
_DynGen_Dispatchers();
|
||||||
recPtr = xGetPtr();
|
recPtr = xGetPtr();
|
||||||
|
@ -1565,6 +1568,7 @@ static void iopRecRecompile(const u32 startpc)
|
||||||
recResetIOP();
|
recResetIOP();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSetTextPtr(R3000A_TEXTPTR);
|
||||||
xSetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
recPtr = xGetAlignedCallTarget();
|
recPtr = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,11 @@ extern u32 target; // branch target
|
||||||
extern u32 s_nBlockCycles; // cycles of current block recompiling
|
extern u32 s_nBlockCycles; // cycles of current block recompiling
|
||||||
extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
|
extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
|
||||||
|
|
||||||
|
// x86 can use shorter displacement if it fits in an s8, so offset 144 bytes into the cpuRegs
|
||||||
|
// This will allow us to reach r1-r16 with a shorter encoding
|
||||||
|
// TODO: Actually figure out what things are used most often, maybe rearrange the cpuRegs struct, and point at that
|
||||||
|
#define R5900_TEXTPTR (&cpuRegs.GPR.r[9])
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
|
@ -445,6 +445,8 @@ static const void* _DynGen_EnterRecompiledCode()
|
||||||
xSUB(rsp, stack_size);
|
xSUB(rsp, stack_size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (u8* ptr = xGetTextPtr())
|
||||||
|
xMOV64(RTEXTPTR, (sptr)ptr);
|
||||||
if (CHECK_FASTMEM)
|
if (CHECK_FASTMEM)
|
||||||
xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]);
|
xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]);
|
||||||
|
|
||||||
|
@ -585,6 +587,7 @@ static void recResetRaw()
|
||||||
|
|
||||||
EE::Profiler.Reset();
|
EE::Profiler.Reset();
|
||||||
|
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
xSetPtr(SysMemory::GetEERec());
|
xSetPtr(SysMemory::GetEERec());
|
||||||
_DynGen_Dispatchers();
|
_DynGen_Dispatchers();
|
||||||
vtlb_DynGenDispatchers();
|
vtlb_DynGenDispatchers();
|
||||||
|
@ -897,6 +900,7 @@ u8* recBeginThunk()
|
||||||
if (recPtr >= recPtrEnd)
|
if (recPtr >= recPtrEnd)
|
||||||
eeRecNeedsReset = true;
|
eeRecNeedsReset = true;
|
||||||
|
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
xSetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
recPtr = xGetAlignedCallTarget();
|
recPtr = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
@ -2191,6 +2195,7 @@ static void recRecompile(const u32 startpc)
|
||||||
recResetRaw();
|
recResetRaw();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
xSetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
recPtr = xGetAlignedCallTarget();
|
recPtr = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
|
|
@ -345,6 +345,7 @@ void vtlb_DynGenDispatchers()
|
||||||
for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++)
|
for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++)
|
||||||
{
|
{
|
||||||
xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign));
|
xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign));
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
|
|
||||||
DynGen_IndirectTlbDispatcher(mode, bits, !!sign);
|
DynGen_IndirectTlbDispatcher(mode, bits, !!sign);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
||||||
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
|
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSetTextPtr(mVU.textPtr());
|
||||||
xSetPtr(mVU.cache);
|
xSetPtr(mVU.cache);
|
||||||
mVUdispatcherAB(mVU);
|
mVUdispatcherAB(mVU);
|
||||||
mVUdispatcherCD(mVU);
|
mVUdispatcherCD(mVU);
|
||||||
|
|
|
@ -123,6 +123,7 @@ struct microVU
|
||||||
s32 cycles; // Cycles Counter
|
s32 cycles; // Cycles Counter
|
||||||
|
|
||||||
VURegs& regs() const { return ::vuRegs[index]; }
|
VURegs& regs() const { return ::vuRegs[index]; }
|
||||||
|
void* textPtr() const { return (index && THREAD_VU1) ? (void*)®s().VF[9] : (void*)R5900_TEXTPTR; }
|
||||||
|
|
||||||
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
||||||
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
||||||
|
|
|
@ -207,15 +207,17 @@ static void mVUGenerateCopyPipelineState(mV)
|
||||||
{
|
{
|
||||||
mVU.copyPLState = xGetAlignedCallTarget();
|
mVU.copyPLState = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
xLoadFarAddr(rdx, reinterpret_cast<u8*>(&mVU.prog.lpState));
|
||||||
|
|
||||||
if (cpuinfo_has_x86_avx())
|
if (cpuinfo_has_x86_avx())
|
||||||
{
|
{
|
||||||
xVMOVAPS(ymm0, ptr[rax]);
|
xVMOVAPS(ymm0, ptr[rax]);
|
||||||
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
||||||
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
||||||
|
|
||||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
|
xVMOVUPS(ptr[rdx], ymm0);
|
||||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
|
xVMOVUPS(ptr[rdx + 32u], ymm1);
|
||||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
|
xVMOVUPS(ptr[rdx + 64u], ymm2);
|
||||||
|
|
||||||
xVZEROUPPER();
|
xVZEROUPPER();
|
||||||
}
|
}
|
||||||
|
@ -228,12 +230,12 @@ static void mVUGenerateCopyPipelineState(mV)
|
||||||
xMOVAPS(xmm4, ptr[rax + 64u]);
|
xMOVAPS(xmm4, ptr[rax + 64u]);
|
||||||
xMOVAPS(xmm5, ptr[rax + 80u]);
|
xMOVAPS(xmm5, ptr[rax + 80u]);
|
||||||
|
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
|
xMOVUPS(ptr[rdx], xmm0);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
|
xMOVUPS(ptr[rdx + 16u], xmm1);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], xmm2);
|
xMOVUPS(ptr[rdx + 32u], xmm2);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
|
xMOVUPS(ptr[rdx + 48u], xmm3);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
|
xMOVUPS(ptr[rdx + 64u], xmm4);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
|
xMOVUPS(ptr[rdx + 80u], xmm5);
|
||||||
}
|
}
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
@ -326,6 +328,7 @@ _mVUt void* mVUexecute(u32 startPC, u32 cycles)
|
||||||
mVU.cycles = cycles;
|
mVU.cycles = cycles;
|
||||||
mVU.totalCycles = cycles;
|
mVU.totalCycles = cycles;
|
||||||
|
|
||||||
|
xSetTextPtr(mVU.textPtr());
|
||||||
xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off
|
xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off
|
||||||
return mVUsearchProg<vuIndex>(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program
|
return mVUsearchProg<vuIndex>(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program
|
||||||
}
|
}
|
||||||
|
|
|
@ -411,6 +411,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gprMap[RTEXTPTR.GetId()].usable = !xGetTextPtr();
|
||||||
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
|
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1106,7 +1106,7 @@ mVUop(mVU_ILW)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
|
@ -1133,7 +1133,7 @@ mVUop(mVU_ILWR)
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
mVUaddrFix (mVU, gprT1q);
|
mVUaddrFix (mVU, gprT1q, gprT2q);
|
||||||
|
|
||||||
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
||||||
|
@ -1170,7 +1170,7 @@ mVUop(mVU_ISW)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If regT is dirty, the high bits might not be zero.
|
// If regT is dirty, the high bits might not be zero.
|
||||||
|
@ -1201,7 +1201,7 @@ mVUop(mVU_ISWR)
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
is = gprT1q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
||||||
|
@ -1257,7 +1257,7 @@ mVUop(mVU_LQ)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
|
@ -1281,7 +1281,7 @@ mVUop(mVU_LQD)
|
||||||
xDEC(regS);
|
xDEC(regS);
|
||||||
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||||
mVU.regAlloc->clearNeeded(regS);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
is = gprT1q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1319,7 +1319,7 @@ mVUop(mVU_LQI)
|
||||||
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||||
xINC(regS);
|
xINC(regS);
|
||||||
mVU.regAlloc->clearNeeded(regS);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
is = gprT1q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
if (!mVUlow.noWriteVF)
|
if (!mVUlow.noWriteVF)
|
||||||
|
@ -1351,7 +1351,7 @@ mVUop(mVU_SQ)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
||||||
|
@ -1375,7 +1375,7 @@ mVUop(mVU_SQD)
|
||||||
xDEC(regT);
|
xDEC(regT);
|
||||||
xMOVZX(gprT1, xRegister16(regT));
|
xMOVZX(gprT1, xRegister16(regT));
|
||||||
mVU.regAlloc->clearNeeded(regT);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
it = gprT1q;
|
it = gprT1q;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1405,7 +1405,7 @@ mVUop(mVU_SQI)
|
||||||
xMOVZX(gprT1, xRegister16(regT));
|
xMOVZX(gprT1, xRegister16(regT));
|
||||||
xINC(regT);
|
xINC(regT);
|
||||||
mVU.regAlloc->clearNeeded(regT);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
||||||
if (_It_)
|
if (_It_)
|
||||||
|
|
|
@ -295,7 +295,7 @@ static void mVUwaitMTVU()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||||
__fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
__fi void mVUaddrFix(mV, const xAddressReg& gprReg, const xAddressReg& tmpReg)
|
||||||
{
|
{
|
||||||
if (isVU1)
|
if (isVU1)
|
||||||
{
|
{
|
||||||
|
@ -324,7 +324,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
||||||
xFastCall((void*)mVU.waitMTVU);
|
xFastCall((void*)mVU.waitMTVU);
|
||||||
}
|
}
|
||||||
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
||||||
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
sptr offset = (u128*)VU1.VF - (u128*)VU0.Mem;
|
||||||
|
if (offset == (s32)offset)
|
||||||
|
{
|
||||||
|
xADD(gprReg, offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xMOV64(tmpReg, offset);
|
||||||
|
xADD(gprReg, tmpReg);
|
||||||
|
}
|
||||||
jmpB.SetTarget();
|
jmpB.SetTarget();
|
||||||
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,8 @@ void dVifRelease(int idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
||||||
: v(vif_)
|
: vifPtr(rax)
|
||||||
|
, v(vif_)
|
||||||
, vB(vifBlock_)
|
, vB(vifBlock_)
|
||||||
{
|
{
|
||||||
const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2)
|
const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2)
|
||||||
|
@ -42,9 +43,6 @@ __fi void makeMergeMask(u32& x)
|
||||||
|
|
||||||
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||||
{
|
{
|
||||||
const int idx = v.idx;
|
|
||||||
const vifStruct& vif = MTVU_VifX;
|
|
||||||
|
|
||||||
//This could have ended up copying the row when there was no row to write.1810080
|
//This could have ended up copying the row when there was no row to write.1810080
|
||||||
u32 m0 = vB.mask; //The actual mask example 0x03020100
|
u32 m0 = vB.mask; //The actual mask example 0x03020100
|
||||||
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
||||||
|
@ -52,14 +50,14 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||||
|
|
||||||
if ((doMask && m2) || doMode)
|
if ((doMask && m2) || doMode)
|
||||||
{
|
{
|
||||||
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
|
xMOVAPS(xmmRow, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)]);
|
||||||
MSKPATH3_LOG("Moving row");
|
MSKPATH3_LOG("Moving row");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doMask && m3)
|
if (doMask && m3)
|
||||||
{
|
{
|
||||||
VIF_LOG("Merging Cols");
|
VIF_LOG("Merging Cols");
|
||||||
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
xMOVAPS(xmmCol0, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskCol)]);
|
||||||
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
||||||
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
||||||
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
||||||
|
@ -137,8 +135,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
|
|
||||||
void VifUnpackSSE_Dynarec::writeBackRow() const
|
void VifUnpackSSE_Dynarec::writeBackRow() const
|
||||||
{
|
{
|
||||||
const int idx = v.idx;
|
xMOVAPS(ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)], xmmRow);
|
||||||
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
|
||||||
|
|
||||||
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
||||||
}
|
}
|
||||||
|
@ -239,6 +236,7 @@ void VifUnpackSSE_Dynarec::ProcessMasks()
|
||||||
|
|
||||||
void VifUnpackSSE_Dynarec::CompileRoutine()
|
void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
{
|
{
|
||||||
|
const int idx = v.idx;
|
||||||
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
||||||
const int upkNum = vB.upkType & 0xf;
|
const int upkNum = vB.upkType & 0xf;
|
||||||
const u8& vift = nVifT[upkNum];
|
const u8& vift = nVifT[upkNum];
|
||||||
|
@ -252,6 +250,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||||
|
|
||||||
pxAssume(vCL == 0);
|
pxAssume(vCL == 0);
|
||||||
|
xLoadFarAddr(vifPtr, &MTVU_VifX);
|
||||||
|
|
||||||
// Value passed determines # of col regs we need to load
|
// Value passed determines # of col regs we need to load
|
||||||
SetMasks(isFill ? blockSize : cycleSize);
|
SetMasks(isFill ? blockSize : cycleSize);
|
||||||
|
@ -336,6 +335,7 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compile the block now
|
// Compile the block now
|
||||||
|
xSetTextPtr(nullptr);
|
||||||
xSetPtr(v.recWritePtr);
|
xSetPtr(v.recWritePtr);
|
||||||
|
|
||||||
block.startPtr = (uptr)xGetAlignedCallTarget();
|
block.startPtr = (uptr)xGetAlignedCallTarget();
|
||||||
|
|
|
@ -329,9 +329,11 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
{
|
{
|
||||||
xMOVAPS(xmm7, ptr[dstIndirect]);
|
xMOVAPS(xmm7, ptr[dstIndirect]);
|
||||||
int offX = std::min(curCycle, 3);
|
int offX = std::min(curCycle, 3);
|
||||||
xPAND(regX, ptr32[nVifMask[0][offX]]);
|
sptr base = reinterpret_cast<sptr>(nVifMask[2]);
|
||||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
|
xLoadFarAddr(rax, nVifMask);
|
||||||
xPOR (regX, ptr32[nVifMask[2][offX]]);
|
xPAND(regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[0][offX]) - base)]);
|
||||||
|
xPAND(xmm7, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[1][offX]) - base)]);
|
||||||
|
xPOR (regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[2][offX]) - base)]);
|
||||||
xPOR (regX, xmm7);
|
xPOR (regX, xmm7);
|
||||||
xMOVAPS(ptr[dstIndirect], regX);
|
xMOVAPS(ptr[dstIndirect], regX);
|
||||||
}
|
}
|
||||||
|
@ -362,6 +364,7 @@ void VifUnpackSSE_Init()
|
||||||
{
|
{
|
||||||
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
|
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
|
||||||
|
|
||||||
|
xSetTextPtr(nullptr);
|
||||||
xSetPtr(SysMemory::GetVIFUnpackRec());
|
xSetPtr(SysMemory::GetVIFUnpackRec());
|
||||||
|
|
||||||
for (int a = 0; a < 2; a++)
|
for (int a = 0; a < 2; a++)
|
||||||
|
|
|
@ -98,6 +98,7 @@ public:
|
||||||
bool inputMasked;
|
bool inputMasked;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
xAddressReg vifPtr;
|
||||||
const nVifStruct& v; // vif0 or vif1
|
const nVifStruct& v; // vif0 or vif1
|
||||||
const nVifBlock& vB; // some pre-collected data from VifStruct
|
const nVifBlock& vB; // some pre-collected data from VifStruct
|
||||||
int vCL; // internal copy of vif->cl
|
int vCL; // internal copy of vif->cl
|
||||||
|
|
Loading…
Reference in New Issue