From be438587c7cbde04e457a2352ebcd83e355898eb Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sun, 13 Nov 2022 19:03:34 +0000 Subject: [PATCH] mVU: Clean up range function and improve merging. Slim down cmpProg. --- pcsx2/x86/microVU.cpp | 30 ++++++++++++++------ pcsx2/x86/microVU_Compile.inl | 53 ++++++++++++++++++----------------- pcsx2/x86/microVU_Misc.h | 26 ++++++++++------- 3 files changed, 64 insertions(+), 45 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index ac3a846594..c48e0325ab 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -191,7 +191,8 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC) prog->idx = mVU.prog.total++; prog->ranges = new std::deque(); prog->startPC = startPC; - mVUcacheProg(mVU, *prog); // Cache Micro Program + if(doWholeProgCompare) + mVUcacheProg(mVU, *prog); // Cache Micro Program double cacheSize = (double)((uptr)mVU.prog.x86end - (uptr)mVU.prog.x86start); double cacheUsed = ((double)((uptr)mVU.prog.x86ptr - (uptr)mVU.prog.x86start)) / (double)_1mb; double cachePerc = ((double)((uptr)mVU.prog.x86ptr - (uptr)mVU.prog.x86start)) / cacheSize * 100; @@ -204,10 +205,18 @@ __ri microProgram* mVUcreateProg(microVU& mVU, int startPC) // Caches Micro Program __ri void mVUcacheProg(microVU& mVU, microProgram& prog) { - if (!mVU.index) - memcpy(prog.data, mVU.regs().Micro, 0x1000); + if (!doWholeProgCompare) + { + auto cmpOffset = [&](void* x) { return (u8*)x + mVUrange.start; }; + memcpy(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (mVUrange.end - mVUrange.start)); + } else - memcpy(prog.data, mVU.regs().Micro, 0x4000); + { + if (!mVU.index) + memcpy(prog.data, mVU.regs().Micro, 0x1000); + else + memcpy(prog.data, mVU.regs().Micro, 0x4000); + } mVUdumpProg(mVU, prog); } @@ -260,9 +269,9 @@ void mVUprintUniqueRatio(microVU& mVU) } // Compare Cached microProgram to mVU.regs().Micro -__fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg) +__fi bool mVUcmpProg(microVU& mVU, microProgram& prog) { - if (cmpWholeProg) + if (doWholeProgCompare) { if (memcmp((u8*)prog.data, mVU.regs().Micro, mVU.microMemSize)) return false; @@ -271,16 +280,19 @@ __fi bool mVUcmpProg(microVU& mVU, microProgram& prog, const bool cmpWholeProg) { for (const auto& range : *prog.ranges) { - auto cmpOffset = [&](void* x) { return (u8*)x + range.start; }; +#if defined(PCSX2_DEVBUILD) || defined(_DEBUG) if ((range.start < 0) || (range.end < 0)) DevCon.Error("microVU%d: Negative Range![%d][%d]", mVU.index, range.start, range.end); +#endif + auto cmpOffset = [&](void* x) { return (u8*)x + range.start; }; + if (memcmp(cmpOffset(prog.data), cmpOffset(mVU.regs().Micro), (range.end - range.start))) return false; } } mVU.prog.cleared = 0; mVU.prog.cur = &prog; - mVU.prog.isSame = cmpWholeProg ? 1 : -1; + mVU.prog.isSame = doWholeProgCompare ? 1 : -1; return true; } @@ -296,7 +308,7 @@ _mVUt __fi void* mVUsearchProg(u32 startPC, uptr pState) std::deque::iterator it(list->begin()); for (; it != list->end(); ++it) { - bool b = mVUcmpProg(mVU, *it[0], 0); + bool b = mVUcmpProg(mVU, *it[0]); if (b) { diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 13d5fe53bc..f24db5d7b1 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -54,12 +54,15 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) pxFailDev("microVU: PC out of VU memory"); } + // The PC handling will prewrap the PC so we need to set the end PC to the end of the micro memory, but only if it wraps, no more. + const s32 cur_pc = (!isStartPC && mVUrange.start > pc && pc == 0) ? mVU.microMemSize : pc; + if (isStartPC) // Check if startPC is already within a block we've recompiled { std::deque::const_iterator it(ranges->begin()); for (; it != ranges->end(); ++it) { - if ((pc >= it[0].start) && (pc <= it[0].end)) + if ((cur_pc >= it[0].start) && (cur_pc <= it[0].end)) { if (it[0].start != it[0].end) { @@ -71,53 +74,51 @@ void mVUsetupRange(microVU& mVU, s32 pc, bool isStartPC) } } } - else if (mVUrange.end >= pc) + else if (mVUrange.end >= cur_pc) { // existing range covers more area than current PC so no need to process it return; } - - mVUcheckIsSame(mVU); + + if (doWholeProgCompare) + mVUcheckIsSame(mVU); if (isStartPC) { - microRange mRange = {pc, -1}; + microRange mRange = {cur_pc, -1}; ranges->push_front(mRange); return; } - if (mVUrange.start <= pc) + + if (mVUrange.start <= cur_pc) { - mVUrange.end = pc; - bool mergedRange = false; - s32 rStart = mVUrange.start; - s32 rEnd = mVUrange.end; + mVUrange.end = cur_pc; + s32& rStart = mVUrange.start; + s32& rEnd = mVUrange.end; std::deque::iterator it(ranges->begin()); - for (++it; it != ranges->end(); ++it) + it++; + for (;it != ranges->end();) { - if ((it[0].start >= rStart) && (it[0].start <= rEnd)) // Starts after this prog but starts before the end of current prog + if (((it->start >= rStart) && (it->start <= rEnd)) || ((it->end >= rStart) && (it->end <= rEnd))) // Starts after this prog but starts before the end of current prog { - it[0].start = std::min(it[0].start, rStart); // Choose the earlier start - mergedRange = true; + rStart = std::min(it->start, rStart); // Choose the earlier start + rEnd = std::max(it->end, rEnd); + it = ranges->erase(it); } - // Make sure we check both as the start on the other one may be later, we don't want to delete that - if ((it[0].end >= rStart) && (it[0].end <= rEnd)) // Ends after this prog starts but ends before this one ends - { - it[0].end = std::max(it[0].end, rEnd); // Extend the end of this prog to match this program - mergedRange = true; - } - } - if (mergedRange) - { - ranges->erase(ranges->begin()); + else + it++; } } else { mVUrange.end = mVU.microMemSize; - DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%d]", mVU.index, mVUrange.start, mVUrange.end); - microRange mRange = {0, pc}; + DevCon.WriteLn(Color_Green, "microVU%d: Prog Range Wrap [%04x] [%04x] PC %x", mVU.index, mVUrange.start, mVUrange.end, cur_pc); + microRange mRange = {0, cur_pc }; ranges->push_front(mRange); } + + if(!doWholeProgCompare) + mVUcacheProg(mVU, *mVU.prog.cur); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 41d34a203d..5244bcff8f 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -292,14 +292,14 @@ typedef u32 (*mVUCall)(void*, void*); //------------------------------------------------------------------ // Reg Alloc -static const bool doRegAlloc = true; // Set to false to flush every 32bit Instruction +static constexpr bool doRegAlloc = true; // Set to false to flush every 32bit Instruction // This turns off reg alloc for the most part, but reg alloc will still // be done within instructions... Also on doSwapOp() regAlloc is needed between // Lower and Upper instructions, so in this case it flushes after the full // 64bit instruction (lower and upper) // No Flag Optimizations -static const bool noFlagOpts = false; // Set to true to disable all flag setting optimizations +static constexpr bool noFlagOpts = false; // Set to true to disable all flag setting optimizations // Note: The flag optimizations this disables should all be harmless, so // this option is mainly just for debugging... it effectively forces mVU // to always update Mac and Status Flags (both sticky and non-sticky) whenever @@ -307,9 +307,9 @@ static const bool noFlagOpts = false; // Set to true to disable all flag setting // flag instances between blocks... // Multiple Flag Instances -static const bool doSFlagInsts = true; // Set to true to enable multiple status flag instances -static const bool doMFlagInsts = true; // Set to true to enable multiple mac flag instances -static const bool doCFlagInsts = true; // Set to true to enable multiple clip flag instances +static constexpr bool doSFlagInsts = true; // Set to true to enable multiple status flag instances +static constexpr bool doMFlagInsts = true; // Set to true to enable multiple mac flag instances +static constexpr bool doCFlagInsts = true; // Set to true to enable multiple clip flag instances // This is the correct behavior of the VU's. Due to the pipeline of the VU's // there can be up to 4 different instances of values to keep track of // for the 3 different types of flags: Status, Mac, Clip flags. @@ -317,27 +317,27 @@ static const bool doCFlagInsts = true; // Set to true to enable multiple clip // corresponding flag, which may be useful when debugging flag pipeline bugs. // Branch in Branch Delay Slots -static const bool doBranchInDelaySlot = true; // Set to true to enable evil-branches +static constexpr bool doBranchInDelaySlot = true; // Set to true to enable evil-branches // This attempts to emulate the correct behavior for branches in branch delay // slots. It is evil that games do this, and handling the different possible // cases is tricky and bug prone. If this option is disabled then the second // branch is treated as a NOP and effectively ignored. // Constant Propagation -static const bool doConstProp = false; // Set to true to turn on vi15 const propagation +static constexpr bool doConstProp = false; // Set to true to turn on vi15 const propagation // Enables Constant Propagation for Jumps based on vi15 'link-register' // allowing us to know many indirect jump target addresses. // Makes GoW a lot slower due to extra recompilation time and extra code-gen! // Indirect Jump Caching -static const bool doJumpCaching = true; // Set to true to enable jump caching +static constexpr bool doJumpCaching = true; // Set to true to enable jump caching // Indirect jumps (JR/JALR) will remember the entry points to their previously // jumped-to addresses. This allows us to skip the microBlockManager::search() // routine that is performed every indirect jump in order to find a block within a // program that matches the correct pipeline state. // Indirect Jumps are part of same cached microProgram -static const bool doJumpAsSameProgram = false; // Set to true to treat jumps as same program +static constexpr bool doJumpAsSameProgram = false; // Set to true to treat jumps as same program // Enabling this treats indirect jumps (JR/JALR) as part of the same microProgram // when determining the valid ranges for the microProgram cache. Disabling this // counts indirect jumps as separate cached microPrograms which generally leads @@ -347,11 +347,17 @@ static const bool doJumpAsSameProgram = false; // Set to true to treat jumps as // Note: You MUST disable doJumpCaching if you enable this option. // Handling of D-Bit in Micro Programs -static const bool doDBitHandling = false; +static constexpr bool doDBitHandling = false; // This flag shouldn't be enabled in released versions of games. Any games which // need this method of pausing the VU should be using the T-Bit instead, however // this could prove useful for VU debugging. +// Whole program comparison on search +static constexpr bool doWholeProgCompare = false; +// This shouldn't be needed and could inflate program generation. +// Compares the entire VU memory with the stored micro program's memory, regardless of if it's used. +// Generally slower but may be useful for debugging. + //------------------------------------------------------------------ // Speed Hacks (can cause infinite loops, SPS, Black Screens, etc...) //------------------------------------------------------------------