mirror of https://github.com/PCSX2/pcsx2.git
EE:Rec: Allow rec memory anywhere
This commit is contained in:
parent
8a9fbb43e6
commit
fe2f97eeb5
|
@ -49,6 +49,7 @@
|
|||
|
||||
|
||||
thread_local u8* x86Ptr;
|
||||
thread_local u8* xTextPtr;
|
||||
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
|
||||
|
||||
namespace x86Emitter
|
||||
|
@ -295,13 +296,27 @@ const xRegister32
|
|||
void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset)
|
||||
{
|
||||
sptr displacement = (sptr)address;
|
||||
sptr textRelative = (sptr)address - (sptr)xTextPtr;
|
||||
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
|
||||
// Can we use an 8-bit offset from the text pointer?
|
||||
if (textRelative == (s8)textRelative && xTextPtr)
|
||||
{
|
||||
ModRM(1, regfield, RTEXTPTR.GetId());
|
||||
xWrite<s8>((s8)textRelative);
|
||||
return;
|
||||
}
|
||||
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
|
||||
if (ripRelative == (s32)ripRelative)
|
||||
else if (ripRelative == (s32)ripRelative)
|
||||
{
|
||||
ModRM(0, regfield, ModRm_UseDisp32);
|
||||
displacement = ripRelative;
|
||||
}
|
||||
// How about from the text pointer?
|
||||
else if (textRelative == (s32)textRelative && xTextPtr)
|
||||
{
|
||||
ModRM(2, regfield, RTEXTPTR.GetId());
|
||||
displacement = textRelative;
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
|
||||
|
@ -539,6 +554,12 @@ const xRegister32
|
|||
x86Ptr = (u8*)ptr;
|
||||
}
|
||||
|
||||
// Assigns the current emitter text base address.
|
||||
__emitinline void xSetTextPtr(void* ptr)
|
||||
{
|
||||
xTextPtr = (u8*)ptr;
|
||||
}
|
||||
|
||||
// Retrieves the current emitter buffer target address.
|
||||
// This is provided instead of using x86Ptr directly, since we may in the future find
|
||||
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
||||
|
@ -547,6 +568,12 @@ const xRegister32
|
|||
return x86Ptr;
|
||||
}
|
||||
|
||||
// Retrieves the current emitter text base address.
|
||||
__emitinline u8* xGetTextPtr()
|
||||
{
|
||||
return xTextPtr;
|
||||
}
|
||||
|
||||
__emitinline void xAlignPtr(uint bytes)
|
||||
{
|
||||
// forward align
|
||||
|
@ -1229,6 +1256,9 @@ const xRegister32
|
|||
#endif
|
||||
|
||||
stackAlign(m_offset, true);
|
||||
|
||||
if (u8* ptr = xGetTextPtr())
|
||||
xMOV64(RTEXTPTR, (sptr)ptr);
|
||||
}
|
||||
|
||||
xScopedStackFrame::~xScopedStackFrame()
|
||||
|
@ -1285,12 +1315,15 @@ const xRegister32
|
|||
{
|
||||
return offset + base;
|
||||
}
|
||||
else
|
||||
if (u8* ptr = xGetTextPtr())
|
||||
{
|
||||
sptr tbase = (sptr)base - (sptr)ptr;
|
||||
if (tbase == (s32)tbase)
|
||||
return offset + RTEXTPTR + tbase;
|
||||
}
|
||||
xLEA(tmpRegister, ptr[base]);
|
||||
return offset + tmpRegister;
|
||||
}
|
||||
}
|
||||
|
||||
void xLoadFarAddr(const xAddressReg& dst, void* addr)
|
||||
{
|
||||
|
|
|
@ -149,11 +149,13 @@ namespace x86Emitter
|
|||
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
|
||||
|
||||
extern void xSetPtr(void* ptr);
|
||||
extern void xSetTextPtr(void* ptr);
|
||||
extern void xAlignPtr(uint bytes);
|
||||
extern void xAdvancePtr(uint bytes);
|
||||
extern void xAlignCallTarget();
|
||||
|
||||
extern u8* xGetPtr();
|
||||
extern u8* xGetTextPtr();
|
||||
extern u8* xGetAlignedCallTarget();
|
||||
|
||||
extern JccComparisonType xInvertCond(JccComparisonType src);
|
||||
|
@ -646,6 +648,8 @@ extern const xRegister32
|
|||
calleeSavedReg1d,
|
||||
calleeSavedReg2d;
|
||||
|
||||
/// Holds a pointer to program text at all times so we don't need to be within 2GB of text
|
||||
static constexpr const xAddressReg& RTEXTPTR = rbx;
|
||||
|
||||
// clang-format on
|
||||
|
||||
|
|
|
@ -890,10 +890,13 @@ static void recReserve()
|
|||
pxFailRel("Failed to allocate R3000 InstCache array.");
|
||||
}
|
||||
|
||||
#define R3000A_TEXTPTR (&psxRegs.GPR.r[33])
|
||||
|
||||
void recResetIOP()
|
||||
{
|
||||
DevCon.WriteLn("iR3000A Recompiler reset.");
|
||||
|
||||
xSetTextPtr(R3000A_TEXTPTR);
|
||||
xSetPtr(SysMemory::GetIOPRec());
|
||||
_DynGen_Dispatchers();
|
||||
recPtr = xGetPtr();
|
||||
|
@ -1565,6 +1568,7 @@ static void iopRecRecompile(const u32 startpc)
|
|||
recResetIOP();
|
||||
}
|
||||
|
||||
xSetTextPtr(R3000A_TEXTPTR);
|
||||
xSetPtr(recPtr);
|
||||
recPtr = xGetAlignedCallTarget();
|
||||
|
||||
|
|
|
@ -21,6 +21,11 @@ extern u32 target; // branch target
|
|||
extern u32 s_nBlockCycles; // cycles of current block recompiling
|
||||
extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
|
||||
|
||||
// x86 can use shorter displacement if it fits in an s8, so offset 144 bytes into the cpuRegs
|
||||
// This will allow us to reach r1-r16 with a shorter encoding
|
||||
// TODO: Actually figure out what things are used most often, maybe rearrange the cpuRegs struct, and point at that
|
||||
#define R5900_TEXTPTR (&cpuRegs.GPR.r[9])
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
|
|
|
@ -445,6 +445,8 @@ static const void* _DynGen_EnterRecompiledCode()
|
|||
xSUB(rsp, stack_size);
|
||||
#endif
|
||||
|
||||
if (u8* ptr = xGetTextPtr())
|
||||
xMOV64(RTEXTPTR, (sptr)ptr);
|
||||
if (CHECK_FASTMEM)
|
||||
xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]);
|
||||
|
||||
|
@ -585,6 +587,7 @@ static void recResetRaw()
|
|||
|
||||
EE::Profiler.Reset();
|
||||
|
||||
xSetTextPtr(R5900_TEXTPTR);
|
||||
xSetPtr(SysMemory::GetEERec());
|
||||
_DynGen_Dispatchers();
|
||||
vtlb_DynGenDispatchers();
|
||||
|
@ -897,6 +900,7 @@ u8* recBeginThunk()
|
|||
if (recPtr >= recPtrEnd)
|
||||
eeRecNeedsReset = true;
|
||||
|
||||
xSetTextPtr(R5900_TEXTPTR);
|
||||
xSetPtr(recPtr);
|
||||
recPtr = xGetAlignedCallTarget();
|
||||
|
||||
|
@ -2191,6 +2195,7 @@ static void recRecompile(const u32 startpc)
|
|||
recResetRaw();
|
||||
}
|
||||
|
||||
xSetTextPtr(R5900_TEXTPTR);
|
||||
xSetPtr(recPtr);
|
||||
recPtr = xGetAlignedCallTarget();
|
||||
|
||||
|
|
|
@ -345,6 +345,7 @@ void vtlb_DynGenDispatchers()
|
|||
for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++)
|
||||
{
|
||||
xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign));
|
||||
xSetTextPtr(R5900_TEXTPTR);
|
||||
|
||||
DynGen_IndirectTlbDispatcher(mode, bits, !!sign);
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
|||
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
|
||||
}
|
||||
|
||||
xSetTextPtr(mVU.textPtr());
|
||||
xSetPtr(mVU.cache);
|
||||
mVUdispatcherAB(mVU);
|
||||
mVUdispatcherCD(mVU);
|
||||
|
|
|
@ -123,6 +123,7 @@ struct microVU
|
|||
s32 cycles; // Cycles Counter
|
||||
|
||||
VURegs& regs() const { return ::vuRegs[index]; }
|
||||
void* textPtr() const { return (index && THREAD_VU1) ? (void*)®s().VF[9] : (void*)R5900_TEXTPTR; }
|
||||
|
||||
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
||||
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
||||
|
|
|
@ -207,15 +207,17 @@ static void mVUGenerateCopyPipelineState(mV)
|
|||
{
|
||||
mVU.copyPLState = xGetAlignedCallTarget();
|
||||
|
||||
xLoadFarAddr(rdx, reinterpret_cast<u8*>(&mVU.prog.lpState));
|
||||
|
||||
if (cpuinfo_has_x86_avx())
|
||||
{
|
||||
xVMOVAPS(ymm0, ptr[rax]);
|
||||
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
||||
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
||||
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
|
||||
xVMOVUPS(ptr[rdx], ymm0);
|
||||
xVMOVUPS(ptr[rdx + 32u], ymm1);
|
||||
xVMOVUPS(ptr[rdx + 64u], ymm2);
|
||||
|
||||
xVZEROUPPER();
|
||||
}
|
||||
|
@ -228,12 +230,12 @@ static void mVUGenerateCopyPipelineState(mV)
|
|||
xMOVAPS(xmm4, ptr[rax + 64u]);
|
||||
xMOVAPS(xmm5, ptr[rax + 80u]);
|
||||
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], xmm2);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
|
||||
xMOVUPS(ptr[rdx], xmm0);
|
||||
xMOVUPS(ptr[rdx + 16u], xmm1);
|
||||
xMOVUPS(ptr[rdx + 32u], xmm2);
|
||||
xMOVUPS(ptr[rdx + 48u], xmm3);
|
||||
xMOVUPS(ptr[rdx + 64u], xmm4);
|
||||
xMOVUPS(ptr[rdx + 80u], xmm5);
|
||||
}
|
||||
|
||||
xRET();
|
||||
|
@ -326,6 +328,7 @@ _mVUt void* mVUexecute(u32 startPC, u32 cycles)
|
|||
mVU.cycles = cycles;
|
||||
mVU.totalCycles = cycles;
|
||||
|
||||
xSetTextPtr(mVU.textPtr());
|
||||
xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off
|
||||
return mVUsearchProg<vuIndex>(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program
|
||||
}
|
||||
|
|
|
@ -411,6 +411,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
gprMap[RTEXTPTR.GetId()].usable = !xGetTextPtr();
|
||||
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
|
||||
}
|
||||
|
||||
|
|
|
@ -1106,7 +1106,7 @@ mVUop(mVU_ILW)
|
|||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||
if (_Imm11_ != 0)
|
||||
xADD(gprT1, _Imm11_);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
}
|
||||
|
||||
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||
|
@ -1133,7 +1133,7 @@ mVUop(mVU_ILWR)
|
|||
if (_Is_)
|
||||
{
|
||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||
mVUaddrFix (mVU, gprT1q);
|
||||
mVUaddrFix (mVU, gprT1q, gprT2q);
|
||||
|
||||
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
||||
|
@ -1170,7 +1170,7 @@ mVUop(mVU_ISW)
|
|||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||
if (_Imm11_ != 0)
|
||||
xADD(gprT1, _Imm11_);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
}
|
||||
|
||||
// If regT is dirty, the high bits might not be zero.
|
||||
|
@ -1201,7 +1201,7 @@ mVUop(mVU_ISWR)
|
|||
if (_Is_)
|
||||
{
|
||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
is = gprT1q;
|
||||
}
|
||||
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
||||
|
@ -1257,7 +1257,7 @@ mVUop(mVU_LQ)
|
|||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||
if (_Imm11_ != 0)
|
||||
xADD(gprT1, _Imm11_);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
}
|
||||
|
||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||
|
@ -1281,7 +1281,7 @@ mVUop(mVU_LQD)
|
|||
xDEC(regS);
|
||||
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||
mVU.regAlloc->clearNeeded(regS);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
is = gprT1q;
|
||||
}
|
||||
else
|
||||
|
@ -1319,7 +1319,7 @@ mVUop(mVU_LQI)
|
|||
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||
xINC(regS);
|
||||
mVU.regAlloc->clearNeeded(regS);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
is = gprT1q;
|
||||
}
|
||||
if (!mVUlow.noWriteVF)
|
||||
|
@ -1351,7 +1351,7 @@ mVUop(mVU_SQ)
|
|||
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
||||
if (_Imm11_ != 0)
|
||||
xADD(gprT1, _Imm11_);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
}
|
||||
|
||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
||||
|
@ -1375,7 +1375,7 @@ mVUop(mVU_SQD)
|
|||
xDEC(regT);
|
||||
xMOVZX(gprT1, xRegister16(regT));
|
||||
mVU.regAlloc->clearNeeded(regT);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
it = gprT1q;
|
||||
}
|
||||
else
|
||||
|
@ -1405,7 +1405,7 @@ mVUop(mVU_SQI)
|
|||
xMOVZX(gprT1, xRegister16(regT));
|
||||
xINC(regT);
|
||||
mVU.regAlloc->clearNeeded(regT);
|
||||
mVUaddrFix(mVU, gprT1q);
|
||||
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||
}
|
||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
||||
if (_It_)
|
||||
|
|
|
@ -295,7 +295,7 @@ static void mVUwaitMTVU()
|
|||
}
|
||||
|
||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||
__fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
||||
__fi void mVUaddrFix(mV, const xAddressReg& gprReg, const xAddressReg& tmpReg)
|
||||
{
|
||||
if (isVU1)
|
||||
{
|
||||
|
@ -324,7 +324,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
|||
xFastCall((void*)mVU.waitMTVU);
|
||||
}
|
||||
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
||||
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
||||
sptr offset = (u128*)VU1.VF - (u128*)VU0.Mem;
|
||||
if (offset == (s32)offset)
|
||||
{
|
||||
xADD(gprReg, offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV64(tmpReg, offset);
|
||||
xADD(gprReg, tmpReg);
|
||||
}
|
||||
jmpB.SetTarget();
|
||||
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
||||
}
|
||||
|
|
|
@ -23,7 +23,8 @@ void dVifRelease(int idx)
|
|||
}
|
||||
|
||||
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
||||
: v(vif_)
|
||||
: vifPtr(rax)
|
||||
, v(vif_)
|
||||
, vB(vifBlock_)
|
||||
{
|
||||
const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2)
|
||||
|
@ -42,9 +43,6 @@ __fi void makeMergeMask(u32& x)
|
|||
|
||||
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||
{
|
||||
const int idx = v.idx;
|
||||
const vifStruct& vif = MTVU_VifX;
|
||||
|
||||
//This could have ended up copying the row when there was no row to write.1810080
|
||||
u32 m0 = vB.mask; //The actual mask example 0x03020100
|
||||
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
||||
|
@ -52,14 +50,14 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
|||
|
||||
if ((doMask && m2) || doMode)
|
||||
{
|
||||
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
|
||||
xMOVAPS(xmmRow, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)]);
|
||||
MSKPATH3_LOG("Moving row");
|
||||
}
|
||||
|
||||
if (doMask && m3)
|
||||
{
|
||||
VIF_LOG("Merging Cols");
|
||||
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
||||
xMOVAPS(xmmCol0, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskCol)]);
|
||||
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
||||
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
||||
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
||||
|
@ -137,8 +135,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
|||
|
||||
void VifUnpackSSE_Dynarec::writeBackRow() const
|
||||
{
|
||||
const int idx = v.idx;
|
||||
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
||||
xMOVAPS(ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)], xmmRow);
|
||||
|
||||
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
||||
}
|
||||
|
@ -239,6 +236,7 @@ void VifUnpackSSE_Dynarec::ProcessMasks()
|
|||
|
||||
void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||
{
|
||||
const int idx = v.idx;
|
||||
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
||||
const int upkNum = vB.upkType & 0xf;
|
||||
const u8& vift = nVifT[upkNum];
|
||||
|
@ -252,6 +250,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
|||
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||
|
||||
pxAssume(vCL == 0);
|
||||
xLoadFarAddr(vifPtr, &MTVU_VifX);
|
||||
|
||||
// Value passed determines # of col regs we need to load
|
||||
SetMasks(isFill ? blockSize : cycleSize);
|
||||
|
@ -336,6 +335,7 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill)
|
|||
}
|
||||
|
||||
// Compile the block now
|
||||
xSetTextPtr(nullptr);
|
||||
xSetPtr(v.recWritePtr);
|
||||
|
||||
block.startPtr = (uptr)xGetAlignedCallTarget();
|
||||
|
|
|
@ -329,9 +329,11 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const
|
|||
{
|
||||
xMOVAPS(xmm7, ptr[dstIndirect]);
|
||||
int offX = std::min(curCycle, 3);
|
||||
xPAND(regX, ptr32[nVifMask[0][offX]]);
|
||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
|
||||
xPOR (regX, ptr32[nVifMask[2][offX]]);
|
||||
sptr base = reinterpret_cast<sptr>(nVifMask[2]);
|
||||
xLoadFarAddr(rax, nVifMask);
|
||||
xPAND(regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[0][offX]) - base)]);
|
||||
xPAND(xmm7, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[1][offX]) - base)]);
|
||||
xPOR (regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[2][offX]) - base)]);
|
||||
xPOR (regX, xmm7);
|
||||
xMOVAPS(ptr[dstIndirect], regX);
|
||||
}
|
||||
|
@ -362,6 +364,7 @@ void VifUnpackSSE_Init()
|
|||
{
|
||||
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
|
||||
|
||||
xSetTextPtr(nullptr);
|
||||
xSetPtr(SysMemory::GetVIFUnpackRec());
|
||||
|
||||
for (int a = 0; a < 2; a++)
|
||||
|
|
|
@ -98,6 +98,7 @@ public:
|
|||
bool inputMasked;
|
||||
|
||||
protected:
|
||||
xAddressReg vifPtr;
|
||||
const nVifStruct& v; // vif0 or vif1
|
||||
const nVifBlock& vB; // some pre-collected data from VifStruct
|
||||
int vCL; // internal copy of vif->cl
|
||||
|
|
Loading…
Reference in New Issue