mVU: Clean up range function and improve merging. Slim down cmpProg.

This commit is contained in:
refractionpcsx2 2022-11-13 19:03:34 +00:00
parent 1146175648
commit be438587c7
3 changed files with 64 additions and 45 deletions

View File

@ -191,7 +191,8 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC)
prog->idx = mVU.prog.total++;
prog->ranges = new std::deque<microRange>();
prog->startPC = startPC;
mVUcacheProg(mVU, *prog); // Cache Micro Program
if(doWholeProgCompare)
mVUcacheProg(mVU, *prog); // Cache Micro Program
double cacheSize = (double)((uptr)mVU.prog.x86end - (uptr)mVU.prog.x86start);
double cacheUsed = ((double)((uptr)mVU.prog.x86ptr - (uptr)mVU.prog.x86start)) / (double)_1mb;
double cachePerc = ((double)((uptr)mVU.prog.x86ptr - (uptr)mVU.prog.x86start)) / cacheSize * 100;
@ -204,10 +205,18 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC)
// Caches Micro Program
__ri void mVUcacheProg(microVU& mVU, microProgram& prog)
{
if (!mVU.index)
memcpy(prog.data, mVU.regs().Micro, 0x1000);
if (!doWholeProgCompare)
{
auto cmpOffset = [&](void* x) { return (u8*)x + mVUrange.start; };
memcpy(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (mVUrange.end - mVUrange.start));
}
else
memcpy(prog.data, mVU.regs().Micro, 0x4000);
{
if (!mVU.index)
memcpy(prog.data, mVU.regs().Micro, 0x1000);
else
memcpy(prog.data, mVU.regs().Micro, 0x4000);
}
mVUdumpProg(mVU, prog);
}
@ -260,9 +269,9 @@ void mVUprintUniqueRatio(microVU& mVU)
}
// Compare Cached microProgram to mVU.regs().Micro
__fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg)
__fi bool mVUcmpProg(microVU& mVU, microProgram& prog)
{
if (cmpWholeProg)
if (doWholeProgCompare)
{
if (memcmp((u8*)prog.data, mVU.regs().Micro, mVU.microMemSize))
return false;
@ -271,16 +280,19 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg)
{
for (const auto& range : *prog.ranges)
{
auto cmpOffset = [&](void* x) { return (u8*)x + range.start; };
#if defined(PCSX2_DEVBUILD) || defined(_DEBUG)
if ((range.start < 0) || (range.end < 0))
DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end);
#endif
auto cmpOffset = [&](void* x) { return (u8*)x + range.start; };
if (memcmp(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start)))
return false;
}
}
mVU.prog.cleared = 0;
mVU.prog.cur = &prog;
mVU.prog.isSame = cmpWholeProg ? 1 : -1;
mVU.prog.isSame = doWholeProgCompare ? 1 : -1;
return true;
}
@ -296,7 +308,7 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState)
std::deque<microProgram*>::iterator it(list->begin());
for (; it != list->end(); ++it)
{
bool b = mVUcmpProg(mVU, *it[0], 0);
bool b = mVUcmpProg(mVU, *it[0]);
if (b)
{

View File

@ -54,12 +54,15 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC)
pxFailDev("microVU: PC out of VU memory");
}
// The PC handling will prewrap the PC so we need to set the end PC to the end of the micro memory, but only if it wraps, no more.
const s32 cur_pc = (!isStartPC && mVUrange.start > pc && pc == 0) ? mVU.microMemSize : pc;
if (isStartPC) // Check if startPC is already within a block we've recompiled
{
std::deque<microRange>::const_iterator it(ranges->begin());
for (; it != ranges->end(); ++it)
{
if ((pc >= it[0].start) && (pc <= it[0].end))
if ((cur_pc >= it[0].start) && (cur_pc <= it[0].end))
{
if (it[0].start != it[0].end)
{
@ -71,53 +74,51 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC)
}
}
}
else if (mVUrange.end >= pc)
else if (mVUrange.end >= cur_pc)
{
// existing range covers more area than current PC so no need to process it
return;
}
mVUcheckIsSame(mVU);
if (doWholeProgCompare)
mVUcheckIsSame(mVU);
if (isStartPC)
{
microRange mRange = {pc, -1};
microRange mRange = {cur_pc, -1};
ranges->push_front(mRange);
return;
}
if (mVUrange.start <= pc)
if (mVUrange.start <= cur_pc)
{
mVUrange.end = pc;
bool mergedRange = false;
s32 rStart = mVUrange.start;
s32 rEnd = mVUrange.end;
mVUrange.end = cur_pc;
s32& rStart = mVUrange.start;
s32& rEnd = mVUrange.end;
std::deque<microRange>::iterator it(ranges->begin());
for (++it; it != ranges->end(); ++it)
it++;
for (;it != ranges->end();)
{
if ((it[0].start >= rStart) && (it[0].start <= rEnd)) // Starts after this prog but starts before the end of current prog
if (((it->start >= rStart) && (it->start <= rEnd)) || ((it->end >= rStart) && (it->end <= rEnd))) // Starts after this prog but starts before the end of current prog
{
it[0].start = std::min(it[0].start, rStart); // Choose the earlier start
mergedRange = true;
rStart = std::min(it->start, rStart); // Choose the earlier start
rEnd = std::max(it->end, rEnd);
it = ranges->erase(it);
}
// Make sure we check both as the start on the other one may be later, we don't want to delete that
if ((it[0].end >= rStart) && (it[0].end <= rEnd)) // Ends after this prog starts but ends before this one ends
{
it[0].end = std::max(it[0].end, rEnd); // Extend the end of this prog to match this program
mergedRange = true;
}
}
if (mergedRange)
{
ranges->erase(ranges->begin());
else
it++;
}
}
else
{
mVUrange.end = mVU.microMemSize;
DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%d]", mVU.index, mVUrange.start, mVUrange.end);
microRange mRange = {0, pc};
DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%04x] PC %x", mVU.index, mVUrange.start, mVUrange.end, cur_pc);
microRange mRange = {0, cur_pc };
ranges->push_front(mRange);
}
if(!doWholeProgCompare)
mVUcacheProg(mVU, *mVU.prog.cur);
}
//------------------------------------------------------------------

View File

@ -292,14 +292,14 @@ typedef u32 (*mVUCall)(void*, void*);
//------------------------------------------------------------------
// Reg Alloc
static const bool doRegAlloc = true; // Set to false to flush every 32bit Instruction
static constexpr bool doRegAlloc = true; // Set to false to flush every 32bit Instruction
// This turns off reg alloc for the most part, but reg alloc will still
// be done within instructions... Also on doSwapOp() regAlloc is needed between
// Lower and Upper instructions, so in this case it flushes after the full
// 64bit instruction (lower and upper)
// No Flag Optimizations
static const bool noFlagOpts = false; // Set to true to disable all flag setting optimizations
static constexpr bool noFlagOpts = false; // Set to true to disable all flag setting optimizations
// Note: The flag optimizations this disables should all be harmless, so
// this option is mainly just for debugging... it effectively forces mVU
// to always update Mac and Status Flags (both sticky and non-sticky) whenever
@ -307,9 +307,9 @@ static const bool noFlagOpts = false; // Set to true to disable all flag setting
// flag instances between blocks...
// Multiple Flag Instances
static const bool doSFlagInsts = true; // Set to true to enable multiple status flag instances
static const bool doMFlagInsts = true; // Set to true to enable multiple mac flag instances
static const bool doCFlagInsts = true; // Set to true to enable multiple clip flag instances
static constexpr bool doSFlagInsts = true; // Set to true to enable multiple status flag instances
static constexpr bool doMFlagInsts = true; // Set to true to enable multiple mac flag instances
static constexpr bool doCFlagInsts = true; // Set to true to enable multiple clip flag instances
// This is the correct behavior of the VU's. Due to the pipeline of the VU's
// there can be up to 4 different instances of values to keep track of
// for the 3 different types of flags: Status, Mac, Clip flags.
@ -317,27 +317,27 @@ static const bool doCFlagInsts = true; // Set to true to enable multiple clip
// corresponding flag, which may be useful when debugging flag pipeline bugs.
// Branch in Branch Delay Slots
static const bool doBranchInDelaySlot = true; // Set to true to enable evil-branches
static constexpr bool doBranchInDelaySlot = true; // Set to true to enable evil-branches
// This attempts to emulate the correct behavior for branches in branch delay
// slots. It is evil that games do this, and handling the different possible
// cases is tricky and bug prone. If this option is disabled then the second
// branch is treated as a NOP and effectively ignored.
// Constant Propagation
static const bool doConstProp = false; // Set to true to turn on vi15 const propagation
static constexpr bool doConstProp = false; // Set to true to turn on vi15 const propagation
// Enables Constant Propagation for Jumps based on vi15 'link-register'
// allowing us to know many indirect jump target addresses.
// Makes GoW a lot slower due to extra recompilation time and extra code-gen!
// Indirect Jump Caching
static const bool doJumpCaching = true; // Set to true to enable jump caching
static constexpr bool doJumpCaching = true; // Set to true to enable jump caching
// Indirect jumps (JR/JALR) will remember the entry points to their previously
// jumped-to addresses. This allows us to skip the microBlockManager::search()
// routine that is performed every indirect jump in order to find a block within a
// program that matches the correct pipeline state.
// Indirect Jumps are part of same cached microProgram
static const bool doJumpAsSameProgram = false; // Set to true to treat jumps as same program
static constexpr bool doJumpAsSameProgram = false; // Set to true to treat jumps as same program
// Enabling this treats indirect jumps (JR/JALR) as part of the same microProgram
// when determining the valid ranges for the microProgram cache. Disabling this
// counts indirect jumps as separate cached microPrograms which generally leads
@ -347,11 +347,17 @@ static const bool doJumpAsSameProgram = false; // Set to true to treat jumps as
// Note: You MUST disable doJumpCaching if you enable this option.
// Handling of D-Bit in Micro Programs
static const bool doDBitHandling = false;
static constexpr bool doDBitHandling = false;
// This flag shouldn't be enabled in released versions of games. Any games which
// need this method of pausing the VU should be using the T-Bit instead, however
// this could prove useful for VU debugging.
// Whole program comparison on search
static constexpr bool doWholeProgCompare = false;
// This shouldn't be needed and could inflate program generation.
// Compares the entire VU memory with the stored micro program's memory, regardless of if it's used.
// Generally slower but may be useful for debugging.
//------------------------------------------------------------------
// Speed Hacks (can cause infinite loops, SPS, Black Screens, etc...)
//------------------------------------------------------------------