mirror of https://github.com/PCSX2/pcsx2.git
x86/microVU: Pack VF cycles into bitfields
The VF cycle count doesn't go above 4, across 32 registers that saves 64 bytes. Also gets rid of blockhasmbit, since save states are getting invalidated anyway, it was never used. [SAVEVERSION+] VU struct changes.
This commit is contained in:
parent
d3e527f2a4
commit
cd9b6c7ac3
|
@ -36,7 +36,7 @@ enum class FreezeAction
|
|||
// [SAVEVERSION+]
|
||||
// This informs the auto updater that the users savestates will be invalidated.
|
||||
|
||||
static const u32 g_SaveVersion = (0x9A35 << 16) | 0x0000;
|
||||
static const u32 g_SaveVersion = (0x9A36 << 16) | 0x0000;
|
||||
|
||||
|
||||
// the freezing data between submodules and core
|
||||
|
|
|
@ -157,7 +157,6 @@ struct alignas(16) VURegs
|
|||
u32 ebit;
|
||||
u32 pending_q;
|
||||
u32 pending_p;
|
||||
u32 blockhasmbit;
|
||||
|
||||
alignas(16) u32 micro_macflags[4];
|
||||
alignas(16) u32 micro_clipflags[4];
|
||||
|
|
|
@ -54,7 +54,6 @@ static void _vu0Exec(VURegs* VU)
|
|||
if (ptr[1] & 0x20000000 && VU == &VU0) // M flag
|
||||
{
|
||||
VU->flags |= VUFLAG_MFLAGSET;
|
||||
VU0.blockhasmbit = true;
|
||||
// Console.WriteLn("fixme: M flag set");
|
||||
}
|
||||
if (ptr[1] & 0x10000000) // D flag
|
||||
|
@ -185,8 +184,6 @@ static void _vu0Exec(VURegs* VU)
|
|||
{
|
||||
VU->VI[REG_TPC].UL = VU->branchpc;
|
||||
|
||||
VU->blockhasmbit = false;
|
||||
|
||||
if (VU->takedelaybranch)
|
||||
{
|
||||
DevCon.Warning("VU0 - Branch/Jump in Delay Slot");
|
||||
|
@ -205,8 +202,6 @@ static void _vu0Exec(VURegs* VU)
|
|||
_vuFlushAll(VU);
|
||||
VU0.VI[REG_VPU_STAT].UL &= ~0x1; /* E flag */
|
||||
vif0Regs.stat.VEW = false;
|
||||
|
||||
VU->blockhasmbit = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -109,7 +109,6 @@ void SaveStateBase::vuMicroFreeze()
|
|||
Freeze(VU0.ebit);
|
||||
Freeze(VU0.pending_q);
|
||||
Freeze(VU0.pending_p);
|
||||
Freeze(VU0.blockhasmbit);
|
||||
Freeze(VU0.micro_macflags);
|
||||
Freeze(VU0.micro_clipflags);
|
||||
Freeze(VU0.micro_statusflags);
|
||||
|
@ -149,7 +148,6 @@ void SaveStateBase::vuMicroFreeze()
|
|||
Freeze(VU1.ebit);
|
||||
Freeze(VU1.pending_q);
|
||||
Freeze(VU1.pending_p);
|
||||
Freeze(VU1.blockhasmbit);
|
||||
Freeze(VU1.micro_macflags);
|
||||
Freeze(VU1.micro_clipflags);
|
||||
Freeze(VU1.micro_statusflags);
|
||||
|
|
|
@ -164,7 +164,7 @@ public:
|
|||
{
|
||||
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4;
|
||||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg;
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].x + (linkI->block.pState.VF[j].y << 8) + (linkI->block.pState.VF[j].z << 16) + (linkI->block.pState.VF[j].x << 24);
|
||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
||||
DevCon.WriteLn(Color_Green,
|
||||
"[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
|
||||
|
|
|
@ -314,9 +314,9 @@ __ri void eBitWarning(mV)
|
|||
//------------------------------------------------------------------
|
||||
// Cycles / Pipeline State / Early Exit from Execution
|
||||
//------------------------------------------------------------------
|
||||
__fi void optimizeReg(u8& rState) { rState = (rState == 1) ? 0 : rState; }
|
||||
__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
|
||||
__fi void tCycles(u8& dest, u8& src) { dest = std::max(dest, src); }
|
||||
__fi u8 optimizeReg(u8 rState) { return (rState == 1) ? 0 : rState; }
|
||||
__fi u8 calcCycles(u8 reg, u8 x) { return ((reg > x) ? (reg - x) : 0); }
|
||||
__fi u8 tCycles(u8 dest, u8 src) { return std::max(dest, src); }
|
||||
__fi void incP(mV) { mVU.p ^= 1; }
|
||||
__fi void incQ(mV) { mVU.q ^= 1; }
|
||||
|
||||
|
@ -328,17 +328,17 @@ void mVUoptimizePipeState(mV)
|
|||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
optimizeReg(mVUregs.VF[i].x);
|
||||
optimizeReg(mVUregs.VF[i].y);
|
||||
optimizeReg(mVUregs.VF[i].z);
|
||||
optimizeReg(mVUregs.VF[i].w);
|
||||
mVUregs.VF[i].x = optimizeReg(mVUregs.VF[i].x);
|
||||
mVUregs.VF[i].y = optimizeReg(mVUregs.VF[i].y);
|
||||
mVUregs.VF[i].z = optimizeReg(mVUregs.VF[i].z);
|
||||
mVUregs.VF[i].w = optimizeReg(mVUregs.VF[i].w);
|
||||
}
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
optimizeReg(mVUregs.VI[i]);
|
||||
mVUregs.VI[i] = optimizeReg(mVUregs.VI[i]);
|
||||
}
|
||||
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } }
|
||||
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(mVU); } }
|
||||
if (mVUregs.q) { mVUregs.q = optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } }
|
||||
if (mVUregs.p) { mVUregs.p = optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(mVU); } }
|
||||
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
|
||||
}
|
||||
|
||||
|
@ -348,21 +348,21 @@ void mVUincCycles(mV, int x)
|
|||
// VF[0] is a constant value (0.0 0.0 0.0 1.0)
|
||||
for (int z = 31; z > 0; z--)
|
||||
{
|
||||
calcCycles(mVUregs.VF[z].x, x);
|
||||
calcCycles(mVUregs.VF[z].y, x);
|
||||
calcCycles(mVUregs.VF[z].z, x);
|
||||
calcCycles(mVUregs.VF[z].w, x);
|
||||
mVUregs.VF[z].x = calcCycles(mVUregs.VF[z].x, x);
|
||||
mVUregs.VF[z].y = calcCycles(mVUregs.VF[z].y, x);
|
||||
mVUregs.VF[z].z = calcCycles(mVUregs.VF[z].z, x);
|
||||
mVUregs.VF[z].w = calcCycles(mVUregs.VF[z].w, x);
|
||||
}
|
||||
// VI[0] is a constant value (0)
|
||||
for (int z = 15; z > 0; z--)
|
||||
{
|
||||
calcCycles(mVUregs.VI[z], x);
|
||||
mVUregs.VI[z] = calcCycles(mVUregs.VI[z], x);
|
||||
}
|
||||
if (mVUregs.q)
|
||||
{
|
||||
if (mVUregs.q > 4)
|
||||
{
|
||||
calcCycles(mVUregs.q, x);
|
||||
mVUregs.q = calcCycles(mVUregs.q, x);
|
||||
if (mVUregs.q <= 4)
|
||||
{
|
||||
mVUinfo.doDivFlag = 1;
|
||||
|
@ -370,27 +370,27 @@ void mVUincCycles(mV, int x)
|
|||
}
|
||||
else
|
||||
{
|
||||
calcCycles(mVUregs.q, x);
|
||||
mVUregs.q = calcCycles(mVUregs.q, x);
|
||||
}
|
||||
if (!mVUregs.q)
|
||||
incQ(mVU);
|
||||
}
|
||||
if (mVUregs.p)
|
||||
{
|
||||
calcCycles(mVUregs.p, x);
|
||||
mVUregs.p = calcCycles(mVUregs.p, x);
|
||||
if (!mVUregs.p || mVUregsTemp.p)
|
||||
incP(mVU);
|
||||
}
|
||||
if (mVUregs.xgkick)
|
||||
{
|
||||
calcCycles(mVUregs.xgkick, x);
|
||||
mVUregs.xgkick = calcCycles(mVUregs.xgkick, x);
|
||||
if (!mVUregs.xgkick)
|
||||
{
|
||||
mVUinfo.doXGKICK = 1;
|
||||
mVUinfo.XGKICKPC = xPC;
|
||||
}
|
||||
}
|
||||
calcCycles(mVUregs.r, x);
|
||||
mVUregs.r = calcCycles(mVUregs.r, x);
|
||||
}
|
||||
|
||||
// Helps check if upper/lower ops read/write to same regs...
|
||||
|
@ -430,21 +430,21 @@ void mVUsetCycles(mV)
|
|||
cmpVFregs(mVUlow.VF_write, mVUup.VF_read[1], mVUinfo.backupVF);
|
||||
}
|
||||
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].x, mVUregsTemp.VF[0].x);
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].y, mVUregsTemp.VF[0].y);
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].z, mVUregsTemp.VF[0].z);
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].w, mVUregsTemp.VF[0].w);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[0]].x = tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].x, mVUregsTemp.VF[0].x);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[0]].y = tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].y, mVUregsTemp.VF[0].y);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[0]].z = tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].z, mVUregsTemp.VF[0].z);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[0]].w = tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].w, mVUregsTemp.VF[0].w);
|
||||
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].x, mVUregsTemp.VF[1].x);
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].y, mVUregsTemp.VF[1].y);
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].z, mVUregsTemp.VF[1].z);
|
||||
tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].w, mVUregsTemp.VF[1].w);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[1]].x = tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].x, mVUregsTemp.VF[1].x);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[1]].y = tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].y, mVUregsTemp.VF[1].y);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[1]].z = tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].z, mVUregsTemp.VF[1].z);
|
||||
mVUregs.VF[mVUregsTemp.VFreg[1]].w = tCycles(mVUregs.VF[mVUregsTemp.VFreg[1]].w, mVUregsTemp.VF[1].w);
|
||||
|
||||
tCycles(mVUregs.VI[mVUregsTemp.VIreg], mVUregsTemp.VI);
|
||||
tCycles(mVUregs.q, mVUregsTemp.q);
|
||||
tCycles(mVUregs.p, mVUregsTemp.p);
|
||||
tCycles(mVUregs.r, mVUregsTemp.r);
|
||||
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
|
||||
mVUregs.VI[mVUregsTemp.VIreg] = tCycles(mVUregs.VI[mVUregsTemp.VIreg], mVUregsTemp.VI);
|
||||
mVUregs.q = tCycles(mVUregs.q, mVUregsTemp.q);
|
||||
mVUregs.p = tCycles(mVUregs.p, mVUregsTemp.p);
|
||||
mVUregs.r = tCycles(mVUregs.r, mVUregsTemp.r);
|
||||
mVUregs.xgkick = tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
|
||||
}
|
||||
|
||||
// Prints Start/End PC of blocks executed, for debugging...
|
||||
|
@ -556,7 +556,6 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr)
|
|||
mVUregs.blockType = 0;
|
||||
mVUregs.viBackUp = 0;
|
||||
mVUregs.flagInfo = 0;
|
||||
mVUregs.mbitinblock = false;
|
||||
mVUsFlagHack = CHECK_VU_FLAGHACK;
|
||||
mVUinitConstValues(mVU);
|
||||
}
|
||||
|
@ -727,7 +726,6 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
|
|||
|
||||
if ((curI & _Mbit_) && isVU0)
|
||||
{
|
||||
mVUregs.mbitinblock = true;
|
||||
if (xPC > 0)
|
||||
{
|
||||
incPC(-2);
|
||||
|
@ -850,7 +848,6 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
|
|||
// Fix up vi15 const info for propagation through blocks
|
||||
mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0;
|
||||
mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0;
|
||||
xMOV(ptr32[&mVU.regs().blockhasmbit], mVUregs.mbitinblock);
|
||||
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
|
||||
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
|
||||
mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging...
|
||||
|
|
|
@ -232,14 +232,10 @@ void mvuGenerateCopyPipelineState(mV)
|
|||
xVMOVAPS(ymm0, ptr[rax]);
|
||||
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
||||
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
||||
xVMOVAPS(ymm3, ptr[rax + 96u]);
|
||||
xVMOVAPS(ymm4, ptr[rax + 128u]);
|
||||
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 96u], ymm3);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 128u], ymm4);
|
||||
|
||||
xVZEROUPPER();
|
||||
}
|
||||
|
@ -251,10 +247,6 @@ void mvuGenerateCopyPipelineState(mV)
|
|||
xMOVAPS(xmm3, ptr[rax + 48u]);
|
||||
xMOVAPS(xmm4, ptr[rax + 64u]);
|
||||
xMOVAPS(xmm5, ptr[rax + 80u]);
|
||||
xMOVAPS(xmm6, ptr[rax + 96u]);
|
||||
xMOVAPS(xmm7, ptr[rax + 112u]);
|
||||
xMOVAPS(xmm8, ptr[rax + 128u]);
|
||||
xMOVAPS(xmm9, ptr[rax + 144u]);
|
||||
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
|
||||
|
@ -262,10 +254,6 @@ void mvuGenerateCopyPipelineState(mV)
|
|||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 96u], xmm6);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 112u], xmm7);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 128u], xmm8);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 144u], xmm9);
|
||||
}
|
||||
|
||||
xRET();
|
||||
|
|
|
@ -17,16 +17,12 @@
|
|||
#include "microVU.h"
|
||||
#include <array>
|
||||
|
||||
union regInfo
|
||||
struct regCycleInfo
|
||||
{
|
||||
u32 reg;
|
||||
struct
|
||||
{
|
||||
u8 x;
|
||||
u8 y;
|
||||
u8 z;
|
||||
u8 w;
|
||||
};
|
||||
u8 x : 4;
|
||||
u8 y : 4;
|
||||
u8 z : 4;
|
||||
u8 w : 4;
|
||||
};
|
||||
|
||||
// microRegInfo is carefully ordered for faster compares. The "important" information is
|
||||
|
@ -57,24 +53,24 @@ union alignas(16) microRegInfo
|
|||
};
|
||||
|
||||
u32 xgkickcycles;
|
||||
u8 mbitinblock;
|
||||
u8 unused;
|
||||
u8 vi15v; // 'vi15' constant is valid
|
||||
u16 vi15; // Constant Prop Info for vi15
|
||||
|
||||
struct
|
||||
{
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
regCycleInfo VF[32];
|
||||
};
|
||||
};
|
||||
|
||||
u128 full128[160 / sizeof(u128)];
|
||||
u64 full64[160 / sizeof(u64)];
|
||||
u32 full32[160 / sizeof(u32)];
|
||||
u128 full128[96 / sizeof(u128)];
|
||||
u64 full64[96 / sizeof(u64)];
|
||||
u32 full32[96 / sizeof(u32)];
|
||||
};
|
||||
|
||||
// Note: mVUcustomSearch needs to be updated if this is changed
|
||||
static_assert(sizeof(microRegInfo) == 160, "microRegInfo was not 160 bytes");
|
||||
static_assert(sizeof(microRegInfo) == 96, "microRegInfo was not 96 bytes");
|
||||
|
||||
struct microProgram;
|
||||
struct microJumpCache
|
||||
|
@ -94,14 +90,14 @@ struct alignas(16) microBlock
|
|||
|
||||
struct microTempRegInfo
|
||||
{
|
||||
regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
|
||||
u8 VFreg[2]; // Index of the VF reg
|
||||
u8 VI; // Holds cycle info for Id
|
||||
u8 VIreg; // Index of the VI reg
|
||||
u8 q; // Holds cycle info for Q reg
|
||||
u8 p; // Holds cycle info for P reg
|
||||
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
|
||||
u8 xgkick; // Holds the cycle info for XGkick
|
||||
regCycleInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction
|
||||
u8 VFreg[2]; // Index of the VF reg
|
||||
u8 VI; // Holds cycle info for Id
|
||||
u8 VIreg; // Index of the VI reg
|
||||
u8 q; // Holds cycle info for Q reg
|
||||
u8 p; // Holds cycle info for P reg
|
||||
u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified)
|
||||
u8 xgkick; // Holds the cycle info for XGkick
|
||||
};
|
||||
|
||||
struct microVFreg
|
||||
|
|
|
@ -644,22 +644,8 @@ void mVUcustomSearch()
|
|||
xMOVAPS (xmm2, ptr32[arg1reg + 0x50]);
|
||||
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x50]);
|
||||
xPAND (xmm1, xmm2);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVAPS (xmm2, ptr32[arg1reg + 0x60]);
|
||||
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x60]);
|
||||
xMOVAPS (xmm3, ptr32[arg1reg + 0x70]);
|
||||
xPCMP.EQD(xmm3, ptr32[arg2reg + 0x70]);
|
||||
xPAND (xmm2, xmm3);
|
||||
|
||||
xMOVAPS (xmm3, ptr32[arg1reg + 0x80]);
|
||||
xPCMP.EQD(xmm3, ptr32[arg2reg + 0x80]);
|
||||
xMOVAPS (xmm4, ptr32[arg1reg + 0x90]);
|
||||
xPCMP.EQD(xmm4, ptr32[arg2reg + 0x90]);
|
||||
xPAND (xmm3, xmm4);
|
||||
|
||||
xPAND (xmm0, xmm1);
|
||||
xPAND (xmm2, xmm3);
|
||||
xPAND (xmm0, xmm2);
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xXOR(eax, 0xf);
|
||||
|
||||
|
@ -675,20 +661,11 @@ void mVUcustomSearch()
|
|||
xForwardJNZ8 exitPoint;
|
||||
|
||||
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
||||
|
||||
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
||||
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
|
||||
|
||||
xVMOVUPS(ymm2, ptr[arg1reg + 0x60]);
|
||||
xVPCMP.EQD(ymm2, ymm2, ptr[arg2reg + 0x60]);
|
||||
xVPAND(ymm0, ymm0, ymm1);
|
||||
|
||||
xVMOVUPS(ymm3, ptr[arg1reg + 0x80]);
|
||||
xVPCMP.EQD(ymm3, ymm3, ptr[arg2reg + 0x80]);
|
||||
xVPAND(ymm2, ymm2, ymm3);
|
||||
xVPAND(ymm0, ymm0, ymm2);
|
||||
|
||||
xVPMOVMSKB(eax, ymm0);
|
||||
xNOT(eax);
|
||||
|
||||
|
|
Loading…
Reference in New Issue