mirror of https://github.com/PCSX2/pcsx2.git
VU: Improve VU0/EE sync, Implement better M-Bit Handling, Fix VU program handing on VIF
This commit is contained in:
parent
0354e5e710
commit
df79a17baa
|
@ -29,13 +29,13 @@ using namespace R5900::Interpreter;
|
|||
void VCALLMS() {
|
||||
vu0Finish();
|
||||
vu0ExecMicro(((cpuRegs.code >> 6) & 0x7FFF));
|
||||
vif0Regs.stat.VEW = false;
|
||||
//vif0Regs.stat.VEW = false;
|
||||
}
|
||||
|
||||
void VCALLMSR() {
|
||||
vu0Finish();
|
||||
vu0ExecMicro(VU0.VI[REG_CMSAR0].US[0]);
|
||||
vif0Regs.stat.VEW = false;
|
||||
//vif0Regs.stat.VEW = false;
|
||||
}
|
||||
|
||||
void BC2F()
|
||||
|
|
|
@ -59,6 +59,7 @@ enum GamefixId
|
|||
Fix_GoemonTlbMiss,
|
||||
Fix_ScarfaceIbit,
|
||||
Fix_CrashTagTeamIbit,
|
||||
Fix_VU0Kickstart,
|
||||
|
||||
GamefixId_COUNT
|
||||
};
|
||||
|
@ -361,7 +362,8 @@ struct Pcsx2Config
|
|||
FMVinSoftwareHack : 1, // Toggle in and out of software rendering when an FMV runs.
|
||||
GoemonTlbHack : 1, // Gomeon tlb miss hack. The game need to access unmapped virtual address. Instead to handle it as exception, tlb are preloaded at startup
|
||||
ScarfaceIbit : 1, // Scarface I bit hack. Needed to stop constant VU recompilation
|
||||
CrashTagTeamRacingIbit : 1; // Crash Tag Team Racing I bit hack. Needed to stop constant VU recompilation
|
||||
CrashTagTeamRacingIbit : 1, // Crash Tag Team Racing I bit hack. Needed to stop constant VU recompilation
|
||||
VU0KickstartHack : 1; // Speed up VU0 at start of program to avoid some VU1 sync issues
|
||||
BITFIELD_END
|
||||
|
||||
GamefixOptions();
|
||||
|
|
|
@ -267,7 +267,8 @@ const wxChar *const tbl_GamefixNames[] =
|
|||
L"FMVinSoftware",
|
||||
L"GoemonTlb",
|
||||
L"ScarfaceIbit",
|
||||
L"CrashTagTeamRacingIbit"
|
||||
L"CrashTagTeamRacingIbit",
|
||||
L"VU0Kickstart"
|
||||
};
|
||||
|
||||
const __fi wxChar* EnumToString( GamefixId id )
|
||||
|
@ -330,7 +331,8 @@ void Pcsx2Config::GamefixOptions::Set( GamefixId id, bool enabled )
|
|||
case Fix_FMVinSoftware: FMVinSoftwareHack = enabled; break;
|
||||
case Fix_GoemonTlbMiss: GoemonTlbHack = enabled; break;
|
||||
case Fix_ScarfaceIbit: ScarfaceIbit = enabled; break;
|
||||
case Fix_CrashTagTeamIbit: CrashTagTeamRacingIbit = enabled; break;
|
||||
case Fix_CrashTagTeamIbit: CrashTagTeamRacingIbit = enabled; break;
|
||||
case Fix_VU0Kickstart: VU0KickstartHack = enabled; break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
@ -356,7 +358,8 @@ bool Pcsx2Config::GamefixOptions::Get( GamefixId id ) const
|
|||
case Fix_FMVinSoftware: return FMVinSoftwareHack;
|
||||
case Fix_GoemonTlbMiss: return GoemonTlbHack;
|
||||
case Fix_ScarfaceIbit: return ScarfaceIbit;
|
||||
case Fix_CrashTagTeamIbit: return CrashTagTeamRacingIbit;
|
||||
case Fix_CrashTagTeamIbit: return CrashTagTeamRacingIbit;
|
||||
case Fix_VU0Kickstart: return VU0KickstartHack;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
return false; // unreachable, but we still need to suppress warnings >_<
|
||||
|
@ -382,7 +385,8 @@ void Pcsx2Config::GamefixOptions::LoadSave( IniInterface& ini )
|
|||
IniBitBool( FMVinSoftwareHack );
|
||||
IniBitBool( GoemonTlbHack );
|
||||
IniBitBool( ScarfaceIbit );
|
||||
IniBitBool( CrashTagTeamRacingIbit );
|
||||
IniBitBool( CrashTagTeamRacingIbit );
|
||||
IniBitBool( VU0KickstartHack );
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -141,7 +141,12 @@ struct __aligned16 VURegs {
|
|||
u32 branchpc;
|
||||
u32 delaybranchpc;
|
||||
bool takedelaybranch;
|
||||
u32 pending_q;
|
||||
u32 pending_p;
|
||||
|
||||
__aligned16 u32 micro_macflags[4];
|
||||
__aligned16 u32 micro_clipflags[4];
|
||||
__aligned16 u32 micro_statusflags[4];
|
||||
// MAC/Status flags -- these are used by interpreters but are kind of hacky
|
||||
// and shouldn't be relied on for any useful/valid info. Would like to move them out of
|
||||
// this struct eventually.
|
||||
|
|
|
@ -58,17 +58,27 @@ __fi void _vu0run(bool breakOnMbit, bool addCycles) {
|
|||
|
||||
if (!(VU0.VI[REG_VPU_STAT].UL & 1)) return;
|
||||
|
||||
int startcycle = VU0.cycle;
|
||||
u32 runCycles = breakOnMbit ? vu0RunCycles : 0x7fffffff;
|
||||
VU0.flags &= ~VUFLAG_MFLAGSET;
|
||||
//VU0 is ahead of the EE and M-Bit is already encountered, so no need to wait for it, just catch up the EE
|
||||
if ((VU0.flags & VUFLAG_MFLAGSET) && breakOnMbit && VU0.cycle >= cpuRegs.cycle)
|
||||
{
|
||||
cpuRegs.cycle = VU0.cycle;
|
||||
return;
|
||||
}
|
||||
|
||||
u32 startcycle = VU0.cycle;
|
||||
u32 runCycles = 0x7fffffff;
|
||||
|
||||
do { // Run VU until it finishes or M-Bit
|
||||
CpuVU0->Execute(runCycles);
|
||||
} while ((VU0.VI[REG_VPU_STAT].UL & 1) // E-bit Termination
|
||||
&& (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET))); // M-bit Break
|
||||
&& (!breakOnMbit || !(VU0.flags & VUFLAG_MFLAGSET) || VU0.cycle < cpuRegs.cycle)); // M-bit Break
|
||||
|
||||
// Add cycles if called from EE's COP2
|
||||
if (addCycles) cpuRegs.cycle += (VU0.cycle-startcycle)*2;
|
||||
if (addCycles)
|
||||
{
|
||||
cpuRegs.cycle += (VU0.cycle - startcycle);
|
||||
VU0.cycle = cpuRegs.cycle;
|
||||
}
|
||||
}
|
||||
|
||||
void _vu0WaitMicro() { _vu0run(1, 1); } // Runs VU0 Micro Until E-bit or M-Bit End
|
||||
|
@ -101,7 +111,7 @@ namespace OpcodeImpl
|
|||
|
||||
void QMFC2() {
|
||||
if (cpuRegs.code & 1) {
|
||||
_vu0WaitMicro();
|
||||
_vu0FinishMicro();
|
||||
}
|
||||
if (_Rt_ == 0) return;
|
||||
cpuRegs.GPR.r[_Rt_].UD[0] = VU0.VF[_Fs_].UD[0];
|
||||
|
@ -119,7 +129,7 @@ void QMTC2() {
|
|||
|
||||
void CFC2() {
|
||||
if (cpuRegs.code & 1) {
|
||||
_vu0WaitMicro();
|
||||
_vu0FinishMicro();
|
||||
}
|
||||
if (_Rt_ == 0) return;
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ void __fastcall vu0ExecMicro(u32 addr) {
|
|||
|
||||
VU0.VI[REG_VPU_STAT].UL &= ~0xFF;
|
||||
VU0.VI[REG_VPU_STAT].UL |= 0x01;
|
||||
|
||||
VU0.cycle = cpuRegs.cycle;
|
||||
if ((s32)addr != -1) VU0.VI[REG_TPC].UL = addr;
|
||||
_vuExecMicroDebug(VU0);
|
||||
CpuVU0->ExecuteBlock(1);
|
||||
|
|
|
@ -157,12 +157,12 @@ static void _vu0Exec(VURegs* VU)
|
|||
|
||||
if(VU->takedelaybranch)
|
||||
{
|
||||
VU->branch = 2;
|
||||
DevCon.Warning("VU0 - Branch/Jump in Delay Slot");
|
||||
VU->branch = 1;
|
||||
DevCon.Warning("VU0 - Branch/Jump in Delay Slot");
|
||||
VU->branchpc = VU->delaybranchpc;
|
||||
VU->delaybranchpc = 0;
|
||||
VU->takedelaybranch = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,8 +206,9 @@ void InterpVU0::Step()
|
|||
void InterpVU0::Execute(u32 cycles)
|
||||
{
|
||||
VU0.VI[REG_TPC].UL <<= 3;
|
||||
for (int i = (int)cycles; i > 0 ; i--) {
|
||||
if (!(VU0.VI[REG_VPU_STAT].UL & 0x1)) {
|
||||
VU0.flags &= ~VUFLAG_MFLAGSET;
|
||||
for (int i = (int)cycles; i > 0; i--) {
|
||||
if (!(VU0.VI[REG_VPU_STAT].UL & 0x1) || (VU0.flags & VUFLAG_MFLAGSET)) {
|
||||
if (VU0.branch || VU0.ebit) {
|
||||
vu0Exec(&VU0); // run branch delay slot?
|
||||
}
|
||||
|
@ -217,4 +218,3 @@ void InterpVU0::Execute(u32 cycles)
|
|||
}
|
||||
VU0.VI[REG_TPC].UL >>= 3;
|
||||
}
|
||||
|
||||
|
|
|
@ -57,10 +57,9 @@ void __fastcall vu1ExecMicro(u32 addr)
|
|||
vu1Finish();
|
||||
|
||||
VUM_LOG("vu1ExecMicro %x (count=%d)", addr, count++);
|
||||
|
||||
VU1.cycle = cpuRegs.cycle;
|
||||
VU0.VI[REG_VPU_STAT].UL &= ~0xFF00;
|
||||
VU0.VI[REG_VPU_STAT].UL |= 0x0100;
|
||||
|
||||
if ((s32)addr != -1) VU1.VI[REG_TPC].UL = addr;
|
||||
_vuExecMicroDebug(VU1);
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@ static void _vu1Exec(VURegs* VU)
|
|||
|
||||
if(VU->takedelaybranch)
|
||||
{
|
||||
VU->branch = 2;
|
||||
VU->branch = 1;
|
||||
//DevCon.Warning("VU1 - Branch/Jump in Delay Slot");
|
||||
VU->branchpc = VU->delaybranchpc;
|
||||
VU->delaybranchpc = 0;
|
||||
|
|
|
@ -25,27 +25,29 @@
|
|||
void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
|
||||
const u32& stat = VU0.VI[REG_VPU_STAT].UL;
|
||||
const int test = m_Idx ? 0x100 : 1;
|
||||
const int s = 1024*8; // Kick Start Cycles (Silver Surfer needs this amount)
|
||||
const int c = 1024*1; // Continue Cycles
|
||||
const int s = EmuConfig.Gamefixes.VU0KickstartHack ? 2048 : 0; // Kick Start Cycles (Silver Surfer, POP:SOT, Lotus needs this amount)
|
||||
|
||||
if (!(stat & test)) return;
|
||||
if (startUp) { // Start Executing a microprogram
|
||||
|
||||
if (startUp && s) { // Start Executing a microprogram
|
||||
Execute(s); // Kick start VU
|
||||
|
||||
// Let VUs run behind EE instead of ahead
|
||||
if (stat & test) {
|
||||
cpuSetNextEventDelta((s+c)*2);
|
||||
m_lastEEcycles = cpuRegs.cycle + (s*2);
|
||||
cpuSetNextEventDelta(s);
|
||||
|
||||
if (m_Idx)
|
||||
VU1.cycle = cpuRegs.cycle;
|
||||
else
|
||||
VU0.cycle = cpuRegs.cycle;
|
||||
}
|
||||
}
|
||||
else { // Continue Executing (VU roughly half the mhz of EE)
|
||||
s32 delta = (s32)(u32)(cpuRegs.cycle - m_lastEEcycles) & ~1;
|
||||
if (delta > 0) { // Enough time has passed
|
||||
delta >>= 1; // Divide by 2 (unsigned)
|
||||
else { // Continue Executing
|
||||
u32 cycle = m_Idx ? VU1.cycle : VU0.cycle;
|
||||
s32 delta = (s32)(u32)(cpuRegs.cycle - cycle);
|
||||
if (delta > 0) { // Enough time has passed
|
||||
Execute(delta); // Execute the time since the last call
|
||||
if (stat & test) {
|
||||
cpuSetNextEventDelta(c*2);
|
||||
m_lastEEcycles = cpuRegs.cycle;
|
||||
}
|
||||
if (stat & test)
|
||||
cpuSetNextEventDelta(delta);
|
||||
}
|
||||
else cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start
|
||||
}
|
||||
|
@ -55,10 +57,10 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) {
|
|||
// EE data to VU0's registers. We want to run VU0 Micro right after this
|
||||
// to ensure that the register is used at the correct time.
|
||||
// This fixes spinning/hanging in some games like Ratchet and Clank's Intro.
|
||||
void __fastcall BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) {
|
||||
void BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) {
|
||||
const u32& stat = VU0.VI[REG_VPU_STAT].UL;
|
||||
const int test = cpu->m_Idx ? 0x100 : 1;
|
||||
const int c = 128; // VU Execution Cycles
|
||||
|
||||
if (stat & test) { // VU is running
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
static int warn = 5;
|
||||
|
@ -67,10 +69,17 @@ void __fastcall BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) {
|
|||
warn--;
|
||||
}
|
||||
#endif
|
||||
cpu->Execute(c); // Execute VU
|
||||
if (stat & test) {
|
||||
cpu->m_lastEEcycles+=(c*2);
|
||||
cpuSetNextEventDelta(c*2);
|
||||
|
||||
u32 cycle = cpu->m_Idx ? VU1.cycle : VU0.cycle;
|
||||
s32 delta = (s32)(u32)(cpuRegs.cycle - cycle);
|
||||
if (delta > 0) { // Enough time has passed
|
||||
cpu->Execute(delta); // Execute the time since the last call
|
||||
if (stat & test) {
|
||||
cpuSetNextEventDelta(delta);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -262,6 +262,7 @@ extern BaseVUmicroCPU* CpuVU1;
|
|||
extern void vu0ResetRegs();
|
||||
extern void __fastcall vu0ExecMicro(u32 addr);
|
||||
extern void vu0Exec(VURegs* VU);
|
||||
extern void _vu0FinishMicro();
|
||||
extern void vu0Finish();
|
||||
extern void iDumpVU0Registers();
|
||||
|
||||
|
|
|
@ -174,10 +174,16 @@ __fi void vif0Interrupt()
|
|||
|
||||
if (!(vif0ch.chcr.STR)) Console.WriteLn("vif0 running when CHCR == %x", vif0ch.chcr._u32);
|
||||
|
||||
if(vif0.waitforvu)
|
||||
{
|
||||
//CPU_INT(DMAC_VIF0, 16);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vif0.irq && vif0.vifstalled.enabled && vif0.vifstalled.value == VIF_IRQ_STALL)
|
||||
{
|
||||
vif0Regs.stat.INT = true;
|
||||
|
||||
|
||||
//Yakuza watches VIF_STAT so lets do this here.
|
||||
if (((vif0Regs.code >> 24) & 0x7f) != 0x7) {
|
||||
vif0Regs.stat.VIS = true;
|
||||
|
@ -193,7 +199,7 @@ __fi void vif0Interrupt()
|
|||
// One game doesn't like vif stalling at end, can't remember what. Spiderman isn't keen on it tho
|
||||
//vif0ch.chcr.STR = false;
|
||||
vif0Regs.stat.FQC = std::min((u16)0x8, vif0ch.qwc);
|
||||
if(vif0ch.qwc > 0 || !vif0.done)
|
||||
if (vif0ch.qwc > 0 || !vif0.done)
|
||||
{
|
||||
VIF_LOG("VIF0 Stalled");
|
||||
return;
|
||||
|
@ -201,13 +207,6 @@ __fi void vif0Interrupt()
|
|||
}
|
||||
}
|
||||
|
||||
if(vif0.waitforvu)
|
||||
{
|
||||
//DevCon.Warning("Waiting on VU0");
|
||||
//CPU_INT(DMAC_VIF0, 16);
|
||||
return;
|
||||
}
|
||||
|
||||
vif0.vifstalled.enabled = false;
|
||||
|
||||
//Must go after the Stall, incase it's still in progress, GTC africa likes to see it still transferring.
|
||||
|
|
|
@ -36,7 +36,7 @@ vifOp(vifCode_Null);
|
|||
|
||||
__ri void vifExecQueue(int idx)
|
||||
{
|
||||
if (!GetVifX.queued_program)
|
||||
if (!GetVifX.queued_program || (VU0.VI[REG_VPU_STAT].UL & 1 << (idx * 8)))
|
||||
return;
|
||||
|
||||
GetVifX.queued_program = false;
|
||||
|
@ -59,6 +59,8 @@ __ri void vifExecQueue(int idx)
|
|||
}
|
||||
|
||||
static __fi void vifFlush(int idx) {
|
||||
vifExecQueue(idx);
|
||||
|
||||
if (!idx) vif0FLUSH();
|
||||
else vif1FLUSH();
|
||||
|
||||
|
@ -119,6 +121,7 @@ void ExecuteVU(int idx)
|
|||
vifX.cmd = 0;
|
||||
vifX.pass = 0;
|
||||
}
|
||||
vifExecQueue(idx);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
|
|
@ -49,7 +49,7 @@ _vifT void vifTransferLoop(u32* &data) {
|
|||
vifX.cmd = data[0] >> 24;
|
||||
|
||||
|
||||
//VIF_LOG("New VifCMD %x tagsize %x", vifX.cmd, vifX.tag.size);
|
||||
VIF_LOG("New VifCMD %x tagsize %x irq %d", vifX.cmd, vifX.tag.size, vifX.irq);
|
||||
if (IsDevBuild && SysTrace.EE.VIFcode.IsActive()) {
|
||||
// Pass 2 means "log it"
|
||||
vifCmdHandler[idx][vifX.cmd & 0x7f](2, data);
|
||||
|
|
|
@ -104,9 +104,13 @@ Panels::GameFixesPanel::GameFixesPanel( wxWindow* parent )
|
|||
_("VU I bit Hack avoid constant recompilation (Scarface The World Is Yours)"),
|
||||
wxEmptyString
|
||||
},
|
||||
{
|
||||
{
|
||||
_("VU I bit Hack avoid constant recompilation (Crash Tag Team Racing)"),
|
||||
wxEmptyString
|
||||
wxEmptyString
|
||||
},
|
||||
{
|
||||
_("VU0 Kickstart to avoid sync problems with VU1"),
|
||||
wxEmptyString
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -74,6 +74,7 @@ void SetBranchImm( u32 imm );
|
|||
void iFlushCall(int flushtype);
|
||||
void recBranchCall( void (*func)() );
|
||||
void recCall( void (*func)() );
|
||||
u32 scaleblockcycles_clear();
|
||||
|
||||
namespace R5900{
|
||||
namespace Dynarec {
|
||||
|
|
|
@ -1027,6 +1027,31 @@ static u32 scaleblockcycles()
|
|||
|
||||
return scaled;
|
||||
}
|
||||
u32 scaleblockcycles_clear()
|
||||
{
|
||||
u32 scaled = scaleblockcycles_calculation();
|
||||
|
||||
#if 0 // Enable this to get some runtime statistics about the scaling result in practice
|
||||
static u32 scaled_overall = 0, unscaled_overall = 0;
|
||||
if (g_resetEeScalingStats)
|
||||
{
|
||||
scaled_overall = unscaled_overall = 0;
|
||||
g_resetEeScalingStats = false;
|
||||
}
|
||||
u32 unscaled = DEFAULT_SCALED_BLOCKS();
|
||||
if (!unscaled) unscaled = 1;
|
||||
|
||||
scaled_overall += scaled;
|
||||
unscaled_overall += unscaled;
|
||||
float ratio = static_cast<float>(unscaled_overall) / scaled_overall;
|
||||
|
||||
DevCon.WriteLn(L"Unscaled overall: %d, scaled overall: %d, relative EE clock speed: %d %%",
|
||||
unscaled_overall, scaled_overall, static_cast<int>(100 * ratio));
|
||||
#endif
|
||||
s_nBlockCycles &= 0x7;
|
||||
|
||||
return scaled;
|
||||
}
|
||||
|
||||
// Generates dynarec code for Event tests followed by a block dispatch (branch).
|
||||
// Parameters:
|
||||
|
|
|
@ -573,6 +573,14 @@ void recSWC1()
|
|||
|
||||
void recLQC2()
|
||||
{
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
if (_Rt_)
|
||||
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
|
||||
else
|
||||
|
@ -602,6 +610,14 @@ void recLQC2()
|
|||
|
||||
void recSQC2()
|
||||
{
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
|
@ -628,4 +644,4 @@ void recSQC2()
|
|||
} } } // end namespace R5900::Dynarec::OpcodeImpl
|
||||
|
||||
using namespace R5900::Dynarec;
|
||||
using namespace R5900::Dynarec::OpcodeImpl;
|
||||
using namespace R5900::Dynarec::OpcodeImpl;
|
|
@ -351,8 +351,11 @@ void recMicroVU1::Reset() {
|
|||
void recMicroVU0::Execute(u32 cycles) {
|
||||
pxAssert(m_Reserved); // please allocate me first! :|
|
||||
|
||||
VU0.flags &= ~VUFLAG_MFLAGSET;
|
||||
|
||||
if(!(VU0.VI[REG_VPU_STAT].UL & 1)) return;
|
||||
VU0.VI[REG_TPC].UL <<= 3;
|
||||
|
||||
// Sometimes games spin on vu0, so be careful with this value
|
||||
// woody hangs if too high on sVU (untested on mVU)
|
||||
// Edit: Need to test this again, if anyone ever has a "Woody" game :p
|
||||
|
|
|
@ -70,12 +70,36 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) {
|
|||
xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ);
|
||||
}
|
||||
|
||||
// Save Flag Instances
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL], getFlagReg(fStatus));
|
||||
// Save MAC, Status and CLIP Flag Instances
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL], getFlagReg(fStatus));
|
||||
mVUallocMFLAGa(mVU, gprT1, fMac);
|
||||
mVUallocCFLAGa(mVU, gprT2, fClip);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2);
|
||||
|
||||
if (!isEbit) { // Backup flag instances
|
||||
xMOVAPS(xmmT1, ptr128[mVU.macFlag]);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1);
|
||||
xMOVAPS(xmmT1, ptr128[mVU.clipFlag]);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1);
|
||||
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[0]], gprF0);
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[1]], gprF1);
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2);
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3);
|
||||
} else { // Flush flag instances
|
||||
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1);
|
||||
|
||||
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1);
|
||||
|
||||
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
|
||||
}
|
||||
|
||||
if (isEbit || isVU1) { // Clear 'is busy' Flags
|
||||
if (!mVU.index || !THREAD_VU1) {
|
||||
|
@ -98,7 +122,12 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
|
|||
int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit);
|
||||
int qInst = 0;
|
||||
int pInst = 0;
|
||||
mVU.regAlloc->flushAll();
|
||||
microBlock stateBackup;
|
||||
memcpy(&stateBackup, &mVUregs, sizeof(mVUregs)); //backup the state, it's about to get screwed with.
|
||||
if(!isEbit)
|
||||
mVU.regAlloc->TDwritebackAll(); //Writing back ok, invalidating early kills the rec, so don't do it :P
|
||||
else
|
||||
mVU.regAlloc->flushAll();
|
||||
|
||||
if (isEbit) {
|
||||
memzero(mVUinfo);
|
||||
|
@ -124,20 +153,55 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
|
|||
}
|
||||
|
||||
// Save P/Q Regs
|
||||
if (qInst) { xPSHUF.D(xmmPQ, xmmPQ, 0xe5); }
|
||||
if (qInst) { xPSHUF.D(xmmPQ, xmmPQ, 0xe1); }
|
||||
xMOVSS(ptr32[&mVU.regs().VI[REG_Q].UL], xmmPQ);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
xMOVSS(ptr32[&mVU.regs().pending_q], xmmPQ);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
|
||||
if (isVU1) {
|
||||
xPSHUF.D(xmmPQ, xmmPQ, pInst ? 3 : 2);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, pInst ? 0x1b : 0x1e);
|
||||
xMOVSS(ptr32[&mVU.regs().VI[REG_P].UL], xmmPQ);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, pInst ? 0x1b : 0x4b);
|
||||
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
xMOVSS(ptr32[&mVU.regs().pending_p], xmmPQ);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0x1b);
|
||||
}
|
||||
|
||||
// Save Flag Instances
|
||||
// Save MAC, Status and CLIP Flag Instances
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL], getFlagReg(fStatus));
|
||||
mVUallocMFLAGa(mVU, gprT1, fMac);
|
||||
mVUallocCFLAGa(mVU, gprT2, fClip);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL], gprT1);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL], gprT2);
|
||||
|
||||
if (!isEbit) { // Backup flag instances
|
||||
xMOVAPS(xmmT1, ptr128[mVU.macFlag]);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1);
|
||||
xMOVAPS(xmmT1, ptr128[mVU.clipFlag]);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1);
|
||||
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[0]], gprF0);
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[1]], gprF1);
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[2]], gprF2);
|
||||
xMOV(ptr32[&mVU.regs().micro_statusflags[3]], gprF3);
|
||||
}
|
||||
else { // Flush flag instances
|
||||
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_clipflags], xmmT1);
|
||||
|
||||
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_MAC_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_macflags], xmmT1);
|
||||
|
||||
xMOVDZX(xmmT1, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
|
||||
}
|
||||
|
||||
|
||||
if (isEbit || isVU1) { // Clear 'is busy' Flags
|
||||
if (!mVU.index || !THREAD_VU1) {
|
||||
xAND(ptr32[&VU0.VI[REG_VPU_STAT].UL], (isVU1 ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
|
||||
|
@ -149,6 +213,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
|
|||
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], xPC);
|
||||
xJMP(mVU.exitFunct);
|
||||
}
|
||||
memcpy(&mVUregs, &stateBackup, sizeof(mVUregs)); //Restore the state for the rest of the recompile
|
||||
}
|
||||
|
||||
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
|
||||
|
|
|
@ -356,7 +356,7 @@ void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC) {
|
|||
|
||||
// vu0 is allowed to exit early, so are dev builds (for inf loops)
|
||||
__fi bool doEarlyExit(microVU& mVU) {
|
||||
return IsDevBuild || !isVU1;
|
||||
return true;// IsDevBuild || !isVU1;
|
||||
}
|
||||
|
||||
// Saves Pipeline State for resuming from early exits
|
||||
|
@ -368,27 +368,32 @@ __fi void mVUsavePipelineState(microVU& mVU) {
|
|||
}
|
||||
|
||||
// Test cycles to see if we need to exit-early...
|
||||
void mVUtestCycles(microVU& mVU) {
|
||||
void mVUtestCycles(microVU& mVU, microFlagCycles& mFC) {
|
||||
iPC = mVUstartPC;
|
||||
if (doEarlyExit(mVU)) {
|
||||
xCMP(ptr32[&mVU.cycles], 0);
|
||||
xForwardJG32 skip;
|
||||
xMOV(eax, ptr32[&mVU.cycles]);
|
||||
if (!EmuConfig.Gamefixes.VU0KickstartHack)
|
||||
xSUB(eax, mVUcycles); // Running behind, make sure we have time to run the block
|
||||
else
|
||||
xSUB(eax, 1); // Running ahead, make sure cycles left are above 0
|
||||
xCMP(eax, 0);
|
||||
xForwardJGE32 skip;
|
||||
mVUsavePipelineState(mVU);
|
||||
if (isVU0) {
|
||||
// TEST32ItoM((uptr)&mVU.regs().flags, VUFLAG_MFLAGSET);
|
||||
// xFowardJZ32 vu0jmp;
|
||||
// mVUbackupRegs(mVU, true);
|
||||
// xFastCall(mVUwarning0, mVU.prog.cur->idx, xPC); // VU0 is allowed early exit for COP2 Interlock Simulation
|
||||
// mVUrestoreRegs(mVU, true);
|
||||
mVUsavePipelineState(mVU);
|
||||
mVUendProgram(mVU, NULL, 0);
|
||||
mVUendProgram(mVU, &mFC, 0);
|
||||
// vu0jmp.SetTarget();
|
||||
}
|
||||
else {
|
||||
mVUbackupRegs(mVU, true);
|
||||
/*mVUbackupRegs(mVU, true);
|
||||
xFastCall(mVUwarning1, mVU.prog.cur->idx, xPC);
|
||||
mVUrestoreRegs(mVU, true);
|
||||
mVUsavePipelineState(mVU);
|
||||
mVUendProgram(mVU, NULL, 0);
|
||||
mVUsavePipelineState(mVU);*/
|
||||
mVUendProgram(mVU, &mFC, 0);
|
||||
}
|
||||
skip.SetTarget();
|
||||
}
|
||||
|
@ -401,7 +406,7 @@ void mVUtestCycles(microVU& mVU) {
|
|||
|
||||
// This gets run at the start of every loop of mVU's first pass
|
||||
__fi void startLoop(mV) {
|
||||
if (curI & _Mbit_) { DevCon.WriteLn (Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); }
|
||||
if (curI & _Mbit_ && isVU0) { DevCon.WriteLn (Color_Green, "microVU%d: M-bit set! PC = %x", getIndex, xPC); }
|
||||
if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set! PC = %x", getIndex, xPC); }
|
||||
if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set! PC = %x", getIndex, xPC); }
|
||||
memzero(mVUinfo);
|
||||
|
@ -475,8 +480,8 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF
|
|||
mVUsetCycles(mVU);
|
||||
mVUinfo.readQ = mVU.q;
|
||||
mVUinfo.writeQ = !mVU.q;
|
||||
mVUinfo.readP = mVU.p;
|
||||
mVUinfo.writeP = !mVU.p;
|
||||
mVUinfo.readP = mVU.p && isVU1;
|
||||
mVUinfo.writeP = !mVU.p && isVU1;
|
||||
mVUcount++;
|
||||
mVUsetFlagInfo(mVU);
|
||||
incPC(1);
|
||||
|
@ -485,7 +490,8 @@ void* mVUcompileSingleInstruction(microVU& mVU, u32 startPC, uptr pState, microF
|
|||
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
|
||||
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
|
||||
mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging...
|
||||
mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary
|
||||
|
||||
mVUtestCycles(mVU, mFC); // Update VU Cycles and Exit Early if Necessary
|
||||
|
||||
// Second Pass
|
||||
iPC = startPC / 4;
|
||||
|
@ -534,37 +540,52 @@ void mVUSaveFlags(microVU& mVU,microFlagCycles &mFC, microFlagCycles &mFCBackup)
|
|||
memcpy(&mFCBackup, &mFC, sizeof(microFlagCycles));
|
||||
mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances
|
||||
}
|
||||
void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
||||
|
||||
void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
|
||||
{
|
||||
microFlagCycles mFC;
|
||||
u8* thisPtr = x86Ptr;
|
||||
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU.microMemSize / 8);
|
||||
u8* thisPtr = x86Ptr;
|
||||
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU.microMemSize / 8);
|
||||
|
||||
// First Pass
|
||||
iPC = startPC / 4;
|
||||
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
|
||||
mVU.regAlloc->reset(); // Reset regAlloc
|
||||
mVU.regAlloc->reset(); // Reset regAlloc
|
||||
mVUinitFirstPass(mVU, pState, thisPtr);
|
||||
mVUbranch = 0;
|
||||
for(int branch = 0; mVUcount < endCount;) {
|
||||
for (int branch = 0; mVUcount < endCount;) {
|
||||
incPC(1);
|
||||
startLoop(mVU);
|
||||
mVUincCycles(mVU, 1);
|
||||
mVUopU(mVU, 0);
|
||||
mVUcheckBadOp(mVU);
|
||||
if (curI & _Ebit_) { eBitPass1(mVU, branch); }
|
||||
|
||||
if (curI & _Mbit_) { mVUup.mBit = true; }
|
||||
|
||||
if (curI & _Ibit_) { mVUlow.isNOP = true; mVUup.iBit = true; }
|
||||
else { incPC(-1); mVUopL(mVU, 0); incPC(1); }
|
||||
if (curI & _Dbit_) { mVUup.dBit = true; }
|
||||
if (curI & _Tbit_) { mVUup.tBit = true; }
|
||||
if (curI & _Ebit_) {
|
||||
eBitPass1(mVU, branch);
|
||||
}
|
||||
|
||||
if ((curI & _Mbit_) && isVU0) {
|
||||
mVUup.mBit = true;
|
||||
}
|
||||
|
||||
if (curI & _Ibit_) {
|
||||
mVUlow.isNOP = true;
|
||||
mVUup.iBit = true;
|
||||
}
|
||||
else {
|
||||
incPC(-1);
|
||||
mVUopL(mVU, 0);
|
||||
incPC(1);
|
||||
}
|
||||
if (curI & _Dbit_) {
|
||||
mVUup.dBit = true;
|
||||
}
|
||||
if (curI & _Tbit_) {
|
||||
mVUup.tBit = true;
|
||||
}
|
||||
mVUsetCycles(mVU);
|
||||
mVUinfo.readQ = mVU.q;
|
||||
mVUinfo.readQ = mVU.q;
|
||||
mVUinfo.writeQ = !mVU.q;
|
||||
mVUinfo.readP = mVU.p;
|
||||
mVUinfo.writeP = !mVU.p;
|
||||
mVUinfo.readP = mVU.p && isVU1;
|
||||
mVUinfo.writeP = !mVU.p && isVU1;
|
||||
mVUcount++;
|
||||
|
||||
if (branch >= 2) {
|
||||
|
@ -588,6 +609,9 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
|||
mVUbranch = 0;
|
||||
}
|
||||
|
||||
if (mVUup.mBit && !branch && !mVUup.eBit)
|
||||
break;
|
||||
|
||||
if (mVUinfo.isEOB)
|
||||
break;
|
||||
|
||||
|
@ -595,24 +619,30 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
|||
}
|
||||
|
||||
// Fix up vi15 const info for propagation through blocks
|
||||
mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0;
|
||||
mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0;
|
||||
mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0;
|
||||
|
||||
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
|
||||
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
|
||||
mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging...
|
||||
mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary
|
||||
mVUtestCycles(mVU, mFC); // Update VU Cycles and Exit Early if Necessary
|
||||
|
||||
// Second Pass
|
||||
iPC = mVUstartPC;
|
||||
setCode();
|
||||
mVUbranch = 0;
|
||||
u32 x = 0;
|
||||
for( ; x < endCount; x++) {
|
||||
if (mVUinfo.isEOB) { handleBadOp(mVU, x); x = 0xffff; } // handleBadOp currently just prints a warning
|
||||
if (mVUup.mBit) { xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); }
|
||||
|
||||
for (; x < endCount; x++) {
|
||||
if (mVUinfo.isEOB) {
|
||||
handleBadOp(mVU, x);
|
||||
x = 0xffff;
|
||||
} // handleBadOp currently just prints a warning
|
||||
if (mVUup.mBit) {
|
||||
xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET);
|
||||
}
|
||||
mVUexecuteInstruction(mVU);
|
||||
if(!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed.
|
||||
if (!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed.
|
||||
{
|
||||
if (mVUup.tBit) {
|
||||
mVUDoTBit(mVU, &mFC);
|
||||
|
@ -620,6 +650,13 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
|||
else if (mVUup.dBit && doDBitHandling) {
|
||||
mVUDoDBit(mVU, &mFC);
|
||||
}
|
||||
else if (mVUup.mBit && !mVUup.eBit && !mVUinfo.isEOB) {
|
||||
mVUsetupRange(mVU, xPC, false);
|
||||
incPC(2);
|
||||
mVUendProgram(mVU, &mFC, 0);
|
||||
incPC(-2);
|
||||
goto perf_and_return;
|
||||
}
|
||||
}
|
||||
|
||||
if (mVUinfo.doXGKICK) {
|
||||
|
@ -640,22 +677,41 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
|||
incPC(-3); // Go back to branch opcode
|
||||
|
||||
switch (mVUlow.branch) {
|
||||
case 1: case 2: normBranch(mVU, mFC); goto perf_and_return; // B/BAL
|
||||
case 9: case 10: normJump (mVU, mFC); goto perf_and_return; // JR/JALR
|
||||
case 3: condBranch(mVU, mFC, Jcc_Equal); goto perf_and_return; // IBEQ
|
||||
case 4: condBranch(mVU, mFC, Jcc_GreaterOrEqual); goto perf_and_return; // IBGEZ
|
||||
case 5: condBranch(mVU, mFC, Jcc_Greater); goto perf_and_return; // IBGTZ
|
||||
case 6: condBranch(mVU, mFC, Jcc_LessOrEqual); goto perf_and_return; // IBLEQ
|
||||
case 7: condBranch(mVU, mFC, Jcc_Less); goto perf_and_return; // IBLTZ
|
||||
case 8: condBranch(mVU, mFC, Jcc_NotEqual); goto perf_and_return; // IBNEQ
|
||||
case 1: // B/BAL
|
||||
case 2:
|
||||
normBranch(mVU, mFC);
|
||||
goto perf_and_return;
|
||||
case 9: // JR/JALR
|
||||
case 10:
|
||||
normJump(mVU, mFC);
|
||||
goto perf_and_return;
|
||||
case 3: // IBEQ
|
||||
condBranch(mVU, mFC, Jcc_Equal);
|
||||
goto perf_and_return;
|
||||
case 4: // IBGEZ
|
||||
condBranch(mVU, mFC, Jcc_GreaterOrEqual);
|
||||
goto perf_and_return;
|
||||
case 5: // IBGTZ
|
||||
condBranch(mVU, mFC, Jcc_Greater);
|
||||
goto perf_and_return;
|
||||
case 6: // IBLEQ
|
||||
condBranch(mVU, mFC, Jcc_LessOrEqual);
|
||||
goto perf_and_return;
|
||||
case 7: // IBLTZ
|
||||
condBranch(mVU, mFC, Jcc_Less);
|
||||
goto perf_and_return;
|
||||
case 8: // IBNEQ
|
||||
condBranch(mVU, mFC, Jcc_NotEqual);
|
||||
goto perf_and_return;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if ((x == endCount) && (x!=1)) { Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); }
|
||||
if ((x == endCount) && (x != 1)) {
|
||||
Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index);
|
||||
}
|
||||
|
||||
// E-bit End
|
||||
mVUsetupRange(mVU, xPC-8, false);
|
||||
mVUsetupRange(mVU, xPC - 8, false);
|
||||
mVUendProgram(mVU, &mFC, 1);
|
||||
|
||||
perf_and_return:
|
||||
|
|
|
@ -34,22 +34,35 @@ void mVUdispatcherAB(mV) {
|
|||
xLDMXCSR(g_sseVUMXCSR);
|
||||
|
||||
// Load Regs
|
||||
xMOV(gprF0, ptr32[&mVU.regs().VI[REG_STATUS_FLAG].UL]);
|
||||
xMOV(gprF1, gprF0);
|
||||
xMOV(gprF2, gprF0);
|
||||
xMOV(gprF3, gprF0);
|
||||
|
||||
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_MAC_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS (ptr128[mVU.macFlag], xmmT1);
|
||||
|
||||
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_CLIP_FLAG].UL]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVAPS (ptr128[mVU.clipFlag], xmmT1);
|
||||
|
||||
xMOVAPS (xmmT1, ptr128[&mVU.regs().VI[REG_P].UL]);
|
||||
xMOVAPS (xmmPQ, ptr128[&mVU.regs().VI[REG_Q].UL]);
|
||||
xMOVDZX (xmmT2, ptr32[&mVU.regs().pending_q]);
|
||||
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
||||
//Load in other Q instance
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
xMOVSS(xmmPQ, xmmT2);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
|
||||
if (isVU1)
|
||||
{
|
||||
//Load in other P instance
|
||||
xMOVDZX(xmmT2, ptr32[&mVU.regs().pending_p]);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0x1B);
|
||||
xMOVSS(xmmPQ, xmmT2);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0x1B);
|
||||
}
|
||||
|
||||
xMOVAPS(xmmT1, ptr128[&mVU.regs().micro_macflags]);
|
||||
xMOVAPS(ptr128[mVU.macFlag], xmmT1);
|
||||
|
||||
|
||||
xMOVAPS(xmmT1, ptr128[&mVU.regs().micro_clipflags]);
|
||||
xMOVAPS(ptr128[mVU.clipFlag], xmmT1);
|
||||
|
||||
xMOV(gprF0, ptr32[&mVU.regs().micro_statusflags[0]]);
|
||||
xMOV(gprF1, ptr32[&mVU.regs().micro_statusflags[1]]);
|
||||
xMOV(gprF2, ptr32[&mVU.regs().micro_statusflags[2]]);
|
||||
xMOV(gprF3, ptr32[&mVU.regs().micro_statusflags[3]]);
|
||||
|
||||
// Jump to Recompiled Code Block
|
||||
xJMP(rax);
|
||||
|
|
|
@ -247,8 +247,14 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
|
|||
//------------------------------------------------------------------
|
||||
|
||||
void COP2_Interlock(bool mBitSync) {
|
||||
|
||||
if (cpuRegs.code & 1) {
|
||||
iFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
if (mBitSync) xFastCall((void*)_vu0WaitMicro);
|
||||
else xFastCall((void*)_vu0FinishMicro);
|
||||
}
|
||||
|
@ -268,6 +274,14 @@ static void recCFC2() {
|
|||
|
||||
COP2_Interlock(false);
|
||||
if (!_Rt_) return;
|
||||
if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) {
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
}
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
if (_Rd_ == REG_STATUS_FLAG) { // Normalize Status Flag
|
||||
|
@ -331,6 +345,14 @@ static void recCTC2() {
|
|||
printCOP2("CTC2");
|
||||
COP2_Interlock(1);
|
||||
if (!_Rd_) return;
|
||||
if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) {
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
}
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
switch(_Rd_) {
|
||||
|
@ -342,12 +364,25 @@ static void recCTC2() {
|
|||
xMOV(ptr32[&vu0Regs.VI[REG_R].UL], eax);
|
||||
break;
|
||||
case REG_STATUS_FLAG:
|
||||
{
|
||||
if (_Rt_) { // Denormalizes flag into eax (gprT1)
|
||||
mVUallocSFLAGd(&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||
xMOV(ptr32[&vu0Regs.VI[_Rd_].UL], eax);
|
||||
}
|
||||
else xMOV(ptr32[&vu0Regs.VI[_Rd_].UL], 0);
|
||||
__aligned16 u32 sticky_flags[4] = { 0xFC0,0xFC0,0xFC0,0xFC0 };
|
||||
__aligned16 u32 status_flags[4] = { 0x3F,0x3F,0x3F,0x3F };
|
||||
|
||||
//Need to update the sticky flags for microVU
|
||||
xMOVDZX(xmmT1, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xAND.PS(xmmT1, ptr128[&sticky_flags]);
|
||||
xMOVAPS(xmmT2, ptr128[&VU0.micro_statusflags]);
|
||||
xAND.PS(xmmT1, ptr128[&status_flags]);
|
||||
xOR.PS(xmmT1, xmmT2);
|
||||
xMOVAPS(ptr128[&VU0.micro_statusflags], xmmT1);
|
||||
break;
|
||||
}
|
||||
case REG_CMSAR1: // Execute VU1 Micro SubRoutine
|
||||
if (_Rt_) {
|
||||
xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
|
@ -357,8 +392,8 @@ static void recCTC2() {
|
|||
xFastCall((void*)vif1VUFinish);
|
||||
break;
|
||||
case REG_FBRST:
|
||||
if (!_Rt_) {
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0);
|
||||
if (!_Rt_) {
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0);
|
||||
return;
|
||||
}
|
||||
else xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
|
@ -373,8 +408,6 @@ static void recCTC2() {
|
|||
// Executing vu0 block here fixes the intro of Ratchet and Clank
|
||||
// sVU's COP2 has a comment that "Donald Duck" needs this too...
|
||||
if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -384,6 +417,15 @@ static void recQMFC2() {
|
|||
printCOP2("QMFC2");
|
||||
COP2_Interlock(false);
|
||||
if (!_Rt_) return;
|
||||
|
||||
if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) {
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
}
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
// FixMe: For some reason this line is needed or else games break:
|
||||
|
@ -398,6 +440,14 @@ static void recQMTC2() {
|
|||
printCOP2("QMTC2");
|
||||
COP2_Interlock(true);
|
||||
if (!_Rd_) return;
|
||||
if (!(cpuRegs.code & 1) && !EmuConfig.Gamefixes.VU0KickstartHack) {
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
}
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
|
||||
xMOVAPS(xmmT1, ptr128[&cpuRegs.GPR.r[_Rt_]]);
|
||||
|
@ -468,5 +518,11 @@ namespace R5900 {
|
|||
namespace Dynarec {
|
||||
namespace OpcodeImpl { void recCOP2() { recCOP2t[_Rs_](); }}}}
|
||||
void recCOP2_BC2 () { recCOP2_BC2t[_Rt_](); }
|
||||
void recCOP2_SPEC1() { recCOP2SPECIAL1t[_Funct_](); }
|
||||
void recCOP2_SPEC2() { recCOP2SPECIAL2t[(cpuRegs.code&3)|((cpuRegs.code>>4)&0x7c)](); }
|
||||
void recCOP2_SPEC1() {
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(eax, ptr[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr[&cpuRegs.cycle], eax); // update cycles
|
||||
xFastCall((void*)_vu0FinishMicro); recCOP2SPECIAL1t[_Funct_]();
|
||||
}
|
||||
void recCOP2_SPEC2() { recCOP2SPECIAL2t[(cpuRegs.code&3)|((cpuRegs.code>>4)&0x7c)](); }
|
Loading…
Reference in New Issue