From f816748d7270f1173a6f525ecbe9acc10db1b2cb Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 1 Aug 2011 03:16:42 +0000 Subject: [PATCH] microVU: - Fixed a bug with mVU's ESIN implementation. This finally fixes the flower-petal bug in Radiata Stories (and hopefully the clouds-bug in VP2 but havn't tested). Rama and I have wanted to fix this for a long time, but never knew the problem :p - Improved implementation of some undefined behavior that happens on branches to take into account pipeline stalls. gif / vif: - Added some logging code that parses gif packets. It can be enabled by the PRINT_GIF_PACKET macro in Gif.h - Deleted some old gif code. - On ReadFIFO_VIF1() clear the out-reg to prevent uninitialized results in case GSreadFIFO() doesn't modify it... - Create a fallback for GSreadFIFO2() so that its always defined (as long as the plugin implements GSreadFIFO()) pcsx2: - Organized the virtual folders in vs2008 a bit... Note: vs2010 and Linux builds need to be modified to add Gif_Logger.cpp and delete GIFpath.cpp git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4835 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/FiFo.cpp | 10 +- pcsx2/GS.cpp | 1 - pcsx2/Gif.h | 1 + pcsx2/Gif_Logger.cpp | 88 ++ pcsx2/Gif_Unit.cpp | 4 +- pcsx2/Gif_Unit.h | 23 +- pcsx2/PluginManager.cpp | 12 +- pcsx2/SaveState.h | 1 - pcsx2/Vif1_Dma.cpp | 44 +- pcsx2/ps2/GIFpath.cpp | 943 ------------- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 1442 ++++++++++---------- pcsx2/x86/microVU_Analyze.inl | 28 +- pcsx2/x86/microVU_Flags.inl | 27 +- pcsx2/x86/microVU_Lower.inl | 57 +- 14 files changed, 926 insertions(+), 1755 deletions(-) create mode 100644 pcsx2/Gif_Logger.cpp delete mode 100644 pcsx2/ps2/GIFpath.cpp diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index d8aaf8bb02..0eb816b7be 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -44,15 +44,13 @@ void __fastcall ReadFIFO_VIF1(mem128_t* out) if (vif1Regs.stat.test(VIF1_STAT_INT | VIF1_STAT_VSS | VIF1_STAT_VIS | VIF1_STAT_VFS) ) DevCon.Warning( "Reading from vif1 fifo when stalled" ); + ZeroQWC(out); // Clear first in case no data gets written... pxAssertRel(vif1Regs.stat.FQC != 0, "FQC = 0 on VIF FIFO READ!"); - if (vif1Regs.stat.FDR) - { - if(vif1Regs.stat.FQC > vif1.GSLastDownloadSize) - { + if (vif1Regs.stat.FDR) { + if (vif1Regs.stat.FQC > vif1.GSLastDownloadSize) { DevCon.Warning("Warning! GS Download size < FIFO count!"); } - if (vif1Regs.stat.FQC > 0) - { + if (vif1Regs.stat.FQC > 0) { GetMTGS().WaitGS(); GSreadFIFO((u64*)out); vif1.GSLastDownloadSize--; diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index a4637a7541..5cc3f2ae26 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -444,5 +444,4 @@ void SaveStateBase::gsFreeze() { FreezeMem(PS2MEM_GS, 0x2000); Freeze(gsRegionMode); - gifPathFreeze(); } diff --git a/pcsx2/Gif.h b/pcsx2/Gif.h index c31f219d31..81238a269e 100644 --- a/pcsx2/Gif.h +++ b/pcsx2/Gif.h @@ -17,6 +17,7 @@ #define USE_OLD_GIF 0 #define COPY_GS_PACKET_TO_MTGS 0 +#define PRINT_GIF_PACKET 0 //#define GUNIT_LOG DevCon.WriteLn #define GUNIT_LOG(...) do {} while(0) diff --git a/pcsx2/Gif_Logger.cpp b/pcsx2/Gif_Logger.cpp new file mode 100644 index 0000000000..bf28de5876 --- /dev/null +++ b/pcsx2/Gif_Logger.cpp @@ -0,0 +1,88 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2010 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "Common.h" +#include "Gif.h" +#include "Gif_Unit.h" + +#define GIF_PARSE DevCon.WriteLn + +static const char GifTag_ModeStr[4][16] = { + "Packed", "Reglist", "Image", "Image2" +}; + +static const char GifTag_RegStr[16][16] = { + "PRIM", "RGBA", "STQ", "UV", + "XYZF2", "XYZ2", "TEX0_1", "TEX0_2", + "CLAMP_1", "CLAMP_2", "FOG", "INVALID", + "XYZF3", "XYZ3", "A+D", "NOP" +}; + +void Gif_ParsePacket(u8* data, u32 size, GIF_PATH path) { + Gif_Tag gifTag; + u8* buffer = data; + u32 offset = 0; + GIF_PARSE("Path %d Transfer", path+1); + for(;;) { + if (!gifTag.isValid) { // Need new Gif Tag + if (offset + 16 > size) return; + + gifTag.setTag(&buffer[offset], 1); + + GIF_PARSE("--Gif Tag [mode=%s][pre=%d][prim=%d][nregs=%d][nloop=%d][qwc=%d][EOP=%d]", + GifTag_ModeStr[gifTag.tag.FLG], gifTag.tag.PRE, gifTag.tag.PRIM, + gifTag.nRegs, gifTag.nLoop, gifTag.len/16, gifTag.tag.EOP); + + if (offset + 16 + gifTag.len > size) return; + offset += 16; + } + + switch(gifTag.tag.FLG) { + case GIF_FLG_PACKED: + for(u32 i = 0; i < gifTag.tag.NLOOP; i++) { + for(u32 j = 0; j < gifTag.nRegs; j++) { + if (gifTag.regs[j] == GIF_REG_A_D) { + GIF_PARSE("----[Reg=A+D(0x%x)][nreg=%d][nloop=%d]", + buffer[offset+8], j, i); + } + else { + GIF_PARSE("----[Reg=%s][nreg=%d][nloop=%d]", + GifTag_RegStr[gifTag.regs[j]&0xf], j, i); + } + offset += 16; // 1 QWC + }} + break; + case GIF_FLG_REGLIST: + for(u32 j = 0; j < gifTag.nRegs; j++) { + GIF_PARSE("----[Reg=%s][nreg=%d]", GifTag_RegStr[gifTag.regs[j]&0xf], j); + } + offset += gifTag.len; // Data length + break; + case GIF_FLG_IMAGE: + case GIF_FLG_IMAGE2: + offset += gifTag.len; // Data length + break; + jNO_DEFAULT; + } + + // Reload gif tag next loop + gifTag.isValid = false; + } +} + +void Gif_ParsePacket(GS_Packet& gsPack, GIF_PATH path) { + Gif_ParsePacket(&gifUnit.gifPath[path].buffer[gsPack.offset], gsPack.size, path); +} diff --git a/pcsx2/Gif_Unit.cpp b/pcsx2/Gif_Unit.cpp index eed19f1125..206043a0ff 100644 --- a/pcsx2/Gif_Unit.cpp +++ b/pcsx2/Gif_Unit.cpp @@ -26,8 +26,8 @@ Gif_Unit gifUnit; bool Gif_HandlerAD(u8* pMem) { u32 reg = pMem[8]; u32* data = (u32*)pMem; - if (reg == 0x50) vif1.BITBLTBUF._u64 = *(u64*)pMem; - elif (reg == 0x52) vif1.TRXREG._u64 = *(u64*)pMem; + if (reg == 0x50) vif1.BITBLTBUF._u64 = *(u64*)pMem; + elif (reg == 0x52) vif1.TRXREG._u64 = *(u64*)pMem; elif (reg == 0x53) { // TRXDIR if ((pMem[0] & 3) == 1) { // local -> host u8 bpp = 32; // Onimusha does TRXDIR without BLTDIVIDE first, assume 32bit diff --git a/pcsx2/Gif_Unit.h b/pcsx2/Gif_Unit.h index 346ffc46cc..9649bf1f9e 100644 --- a/pcsx2/Gif_Unit.h +++ b/pcsx2/Gif_Unit.h @@ -1,8 +1,27 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2010 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ #pragma once #include "System/SysThreads.h" +struct GS_Packet; +extern void Gif_MTGS_Wait(); extern void Gif_FinishIRQ(); extern bool Gif_HandlerAD(u8* pMem); +extern void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path); +extern void Gif_ParsePacket(u8* data, u32 size, GIF_PATH path); +extern void Gif_ParsePacket(GS_Packet& gsPack, GIF_PATH path); struct Gif_Tag { struct HW_Gif_Tag { @@ -152,7 +171,6 @@ struct Gif_Path { //pxAssertDev(AtomicExchangeAdd(readAmount, 0) != 0, "Gif Path Buffer Overflow!"); DevCon.WriteLn(Color_Red, "Gif Path[%d] - MTGS Wait! [r=0x%x]", idx+1, AtomicExchangeAdd(readAmount, 0)); - extern void Gif_MTGS_Wait(); Gif_MTGS_Wait(); } @@ -289,8 +307,8 @@ struct Gif_Unit { // Adds a finished GS Packet to the MTGS ring buffer __fi void AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path) { - extern void Gif_AddCompletedGSPacket(GS_Packet& gsPack, GIF_PATH path); Gif_AddCompletedGSPacket(gsPack, path); + if (PRINT_GIF_PACKET) Gif_ParsePacket(gsPack, path); } // Returns GS Packet Size in bytes @@ -406,6 +424,7 @@ struct Gif_Unit { continue; } } + //FlushToMTGS(); //DevCon.WriteLn("Incomplete GS Packet for path %d, size=%d", stat.APATH, gsPack.size); break; // Not finished with GS packet } diff --git a/pcsx2/PluginManager.cpp b/pcsx2/PluginManager.cpp index f780e5c78f..7ea2ce9793 100644 --- a/pcsx2/PluginManager.cpp +++ b/pcsx2/PluginManager.cpp @@ -203,7 +203,7 @@ void CALLBACK GS_getTitleInfo2( char* dest, size_t length ) // in the BIOS when it starts an XGKICK prior to having an EOP written to VU1 memory). The new // MTGS wraps data around the end of the MTGS buffer, so it often splits PATH1 data into two // transfers now. -static void CALLBACK GS_gifTransferLegacy( const u32* src, u32 data ) +static void CALLBACK GS_Legacy_gifTransfer( const u32* src, u32 data ) { static __aligned16 u128 path1queue[0x400]; static uint path1size = 0; @@ -247,11 +247,15 @@ static void CALLBACK GS_gifTransferLegacy( const u32* src, u32 data ) // callback, which falls back to this function if its an old plugin. // Since GSgifTransfer2 is the least hacky old call-back, and MTGS will // just be using a single gif path, we'll just solely use path 2... -static void CALLBACK GS_gifTransferLegacy(const u32* src, u32 data) { +static void CALLBACK GS_Legacy_gifTransfer(const u32* src, u32 data) { GSgifTransfer2((u32*)src, data); } #endif +static void CALLBACK GS_Legacy_GSreadFIFO2(u64* pMem, int qwc) { + while(qwc--) GSreadFIFO(pMem); +} + // PAD _PADinit PADinit; _PADopen PADopen; @@ -371,10 +375,10 @@ static const LegacyApi_ReqMethod s_MethMessReq_GS[] = { { "GSopen", (vMeth**)&GSopen, NULL }, { "GSvsync", (vMeth**)&GSvsync, NULL }, - { "GSgifTransfer", (vMeth**)&GSgifTransfer, (vMeth*)GS_gifTransferLegacy }, + { "GSgifTransfer", (vMeth**)&GSgifTransfer, (vMeth*)GS_Legacy_gifTransfer }, { "GSgifTransfer2", (vMeth**)&GSgifTransfer2, NULL }, { "GSgifTransfer3", (vMeth**)&GSgifTransfer3, NULL }, - { "GSreadFIFO2", (vMeth**)&GSreadFIFO2, NULL }, + { "GSreadFIFO2", (vMeth**)&GSreadFIFO2, (vMeth*)GS_Legacy_GSreadFIFO2 }, { "GSmakeSnapshot", (vMeth**)&GSmakeSnapshot, (vMeth*)GS_makeSnapshot }, { "GSirqCallback", (vMeth**)&GSirqCallback, (vMeth*)GS_irqCallback }, diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index 6dd129442d..df3fad3fbf 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -206,7 +206,6 @@ protected: void gifFreeze(); void gifDmaFreeze(); void gifPathFreeze(u32 path); // called by gifFreeze() - void gifPathFreeze(); // called by gsFreeze() void sprFreeze(); diff --git a/pcsx2/Vif1_Dma.cpp b/pcsx2/Vif1_Dma.cpp index fee37b55b6..209367bbee 100644 --- a/pcsx2/Vif1_Dma.cpp +++ b/pcsx2/Vif1_Dma.cpp @@ -51,29 +51,27 @@ __fi void vif1FLUSH() void vif1TransferToMemory() { - u32 size; u128* pMem = (u128*)dmaGetAddr(vif1ch.madr, false); // VIF from gsMemory - if (pMem == NULL) //Is vif0ptag empty? - { + if (pMem == NULL) { // Is vif0ptag empty? Console.WriteLn("Vif1 Tag BUSERR"); - dmacRegs.stat.BEIS = true; //Bus Error + dmacRegs.stat.BEIS = true; // Bus Error vif1Regs.stat.FQC = 0; vif1ch.qwc = 0; vif1.done = true; CPU_INT(DMAC_VIF1, 0); - return; //An error has occurred. + return; // An error has occurred. } // MTGS concerns: The MTGS is inherently disagreeable with the idea of downloading // stuff from the GS. The *only* way to handle this case safely is to flush the GS // completely and execute the transfer there-after. //Console.Warning("Real QWC %x", vif1ch.qwc); - size = min((u32)vif1ch.qwc, vif1.GSLastDownloadSize); - const u128* pMemEnd = pMem + vif1.GSLastDownloadSize; - + const u32 size = min(vif1.GSLastDownloadSize, (u32)vif1ch.qwc); + const u128* pMemEnd = vif1.GSLastDownloadSize + pMem; + if (size) { // Checking if any crazy game does a partial // gs primitive and then does a gs download... @@ -85,42 +83,26 @@ void vif1TransferToMemory() pxAssert(p3.isDone() || !p3.gifTag.isValid); } - if (GSreadFIFO2 == NULL) - { - for ( ; size > 0; --size) - { - GetMTGS().WaitGS(); - GSreadFIFO((u64*)pMem); - ++pMem; - } - } - else - { - GetMTGS().WaitGS(); - GSreadFIFO2((u64*)pMem, size); - pMem += size; - } + GetMTGS().WaitGS(); + GSreadFIFO2((u64*)pMem, size); + pMem += size; - if(pMem < pMemEnd) - { + if(pMem < pMemEnd) { DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space"); __m128 zeroreg = _mm_setzero_ps(); do { _mm_store_ps((float*)pMem, zeroreg); - ++pMem; - } while (pMem < pMemEnd); + } while (++pMem < pMemEnd); } g_vifCycles += vif1ch.qwc * 2; vif1ch.madr += vif1ch.qwc * 16; // mgs3 scene changes - if(vif1.GSLastDownloadSize >= vif1ch.qwc) - { + if (vif1.GSLastDownloadSize >= vif1ch.qwc) { vif1.GSLastDownloadSize -= vif1ch.qwc; vif1Regs.stat.FQC = min((u32)16, vif1.GSLastDownloadSize); } - else - { + else { vif1Regs.stat.FQC = 0; vif1.GSLastDownloadSize = 0; } diff --git a/pcsx2/ps2/GIFpath.cpp b/pcsx2/ps2/GIFpath.cpp deleted file mode 100644 index 1b5500049c..0000000000 --- a/pcsx2/ps2/GIFpath.cpp +++ /dev/null @@ -1,943 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "Common.h" -#include "GS.h" -#include "Gif.h" -#include "Vif_Dma.h" -#include "Vif.h" -#include - -#if USE_OLD_GIF == 1 // d -// -------------------------------------------------------------------------------------- -// GIFpath -- the GIFtag Parser -// -------------------------------------------------------------------------------------- - -// GIFTAG -// Members of this structure are in CAPS to help visually denote that they are representative -// of actual hw register states of the GIF, unlike the internal tracking vars in GIFPath, which -// are modified during the GIFtag unpacking process. -struct GIFTAG -{ - u16 NLOOP : 15; - u16 EOP : 1; - - // Note that contents of the Dummy bits on real hardware is likely used to maintain state - // information regarding tag processing (namely nllop and curreg info, so to resume partial - // transfers later). - u16 _dummy0 : 16; - u32 _dummy1 : 14; - - u32 PRE : 1; - u32 PRIM : 11; - u32 FLG : 2; - u32 NREG : 4; - u32 REGS[2]; - - GIFTAG() {} - - wxString DumpRegsToString() const; - wxString ToString() const; -}; - -wxString GIFTAG::DumpRegsToString() const -{ - static const char* PackedModeRegsLabel[] = - { - "PRIM", "RGBA", "STQ", "UV", - "XYZF2", "XYZ2", "TEX0_1", "TEX0_2", - "CLAMP_1", "CLAMP_2", "FOG", "Unknown", - "XYZF3", "XYZ3", "A_D", "NOP" - }; - - u32 tempreg = REGS[0]; - uint numregs = ((NREG-1)&0xf) + 1; - - FastFormatUnicode result; - result.Write("NREG=0x%02X (", NREG); - - for (u32 i = 0; i < numregs; i++) { - if (i == 8) tempreg = REGS[1]; - if (i > 0) result.Write(" "); - result.Write(PackedModeRegsLabel[tempreg & 0xf]); - tempreg >>= 4; - } - - result.Write(")"); - return result; -} - -wxString GIFTAG::ToString() const -{ - static const char* GifTagModeLabel[] = - { - "Packed", "RegList", "Image", "Image2" - }; - - FastFormatUnicode result; - result.Write("NLOOP=0x%04X, EOP=%u, PRE=%u, PRIM=0x%03X, MODE=%s", - NLOOP, EOP, PRE, PRIM, GifTagModeLabel[FLG]); - - return result; -} - - -// -------------------------------------------------------------------------------------- -// GIFPath -- PS2 GIFtag info (one for each path). -// -------------------------------------------------------------------------------------- -// fixme: The real PS2 has a single internal PATH and 3 logical sources, not 3 entirely -// separate paths. But for that to work properly we need also interlocked path sources. -// That is, when the GIF selects a source, it sticks to that source until an EOP. Currently -// this is not emulated! - -struct GIFPath -{ - const GIFTAG tag; // A copy of the "original" tag -- modification allowed only by SetTag(), so let's make it const. - u8 regs[16]; // positioned after tag ensures 16-bit aligned (in case we SSE optimize later) - - u32 nloop; // local copy nloop counts toward zero, and leaves the tag copy unmodified. - u32 curreg; // reg we left of on (for traversing through loops) - u32 numregs; // number of regs (when NREG is 0, numregs is 16) - u32 DetectE; - - GIFPath(); - - void Reset(); - void PrepPackedRegs(); - bool StepReg(); - u8 GetReg(); - bool IsActive() const; - - template< bool Aligned > - void SetTag(const void* mem); - - template< GIF_PATH pathidx, bool Aligned > - int CopyTag(const u128* pMem, u32 size); - - int ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size); -}; - -typedef void (__fastcall *GIFRegHandler)(const u32* data); - -struct GifPathStruct -{ - const GIFRegHandler Handlers[0x100-0x60]; // handlers for 0x60->0x100 - GIFPath path[3]; - - __fi GIFPath& operator[]( int idx ) { return path[idx]; } -}; - - -// -------------------------------------------------------------------------------------- -// SIGNAL / FINISH / LABEL -// -------------------------------------------------------------------------------------- - -bool SIGNAL_IMR_Pending = false; -u32 SIGNAL_Data_Pending[2]; - - -// SIGNAL : This register is a double-throw. If the SIGNAL bit in CSR is clear, set the CSR -// and raise a gsIrq. If CSR is already *set*, then do not raise a gsIrq, and ignore all -// subsequent drawing operations and writes to general purpose registers to the GS. (note: -// I'm pretty sure this includes direct GS and GSreg accesses, as well as those coming -// through the GIFpath -- but that behavior isn't confirmed yet). Privileged writes are -// still active. -// -// Ignorance continues until the SIGNAL bit in CSR is manually cleared by the EE. And here's -// the tricky part: the interrupt from the second SIGNAL is still pending, and should be -// raised once the EE has reset the *IMR* mask for SIGNAL -- meaning setting the bit to 1 -// (disabled/masked) and then back to 0 (enabled/unmasked). Until the *IMR* is cleared, the -// SIGNAL is still in the second throw stage, and will freeze the GS upon being written. -// -static void __fastcall RegHandlerSIGNAL(const u32* data) -{ - // HACK: - // Soul Calibur 3 seems to be doing SIGNALs on PATH2 and PATH3 simultaneously, and isn't - // too happy with the results (dies on bootup). It properly clears the SIGNAL interrupt - // but seems to get stuck on a VBLANK OVERLAP loop. Fixing SIGNAL so that it properly - // stalls the GIF might fix it. Investigating the game's internals more deeply may also - // be revealing. --air - - if (CSRreg.SIGNAL) - { - // Time to ignore all subsequent drawing operations. (which is not yet supported) - if (!SIGNAL_IMR_Pending) - { - //DevCon.WriteLn( Color_StrongOrange, "GS SIGNAL double throw encountered!" ); - SIGNAL_IMR_Pending = true; - SIGNAL_Data_Pending[0] = data[0]; - SIGNAL_Data_Pending[1] = data[1]; - - // [TODO] (SIGNAL) : Disable GIFpath DMAs here! - // All PATHs and DMAs should be disabled until the CSR is written and the - // SIGNAL bit cleared. - } - } - else - { - GIF_LOG("GS SIGNAL data=%x_%x IMR=%x CSRr=%x",data[0], data[1], GSIMR, GSCSRr); - GSSIGLBLID.SIGID = (GSSIGLBLID.SIGID&~data[1])|(data[0]&data[1]); - - if (!(GSIMR&0x100)) - gsIrq(); - - CSRreg.SIGNAL = true; - } -} - -// FINISH : Enables end-of-draw signaling. When FINISH is written it tells the GIF to -// raise a gsIrq and set the FINISH bit of CSR when the *current drawing operation* is -// finished. Translation: Only after all three logical GIFpaths are in EOP status. -// -// This feature can be used for both reversing the GS transfer mode (downloading post- -// processing effects to the EE), and more importantly for *DMA synch* between the -// three logical GIFpaths. -// -static void __fastcall RegHandlerFINISH(const u32* data) -{ - GifTagLog("GIFpath FINISH data=%x_%x CSRr=%x", data[0], data[1], GSCSRr); - - // The FINISH bit is set here, and then it will be cleared when all three - // logical GIFpaths finish their packets (EOPs) At that time (found below - // in the GIFpath_Parser), IMR is tested and a gsIrq() raised if needed. - - CSRreg.FINISH = true; -} - -static void __fastcall RegHandlerLABEL(const u32* data) -{ - GifTagLog( "GIFpath LABEL" ); - GSSIGLBLID.LBLID = (GSSIGLBLID.LBLID&~data[1])|(data[0]&data[1]); -} - -static void __fastcall RegHandlerUNMAPPED(const u32* data) -{ - const int regidx = ((u8*)data)[8]; - - // Known "unknowns": - // It's possible that anything above 0x63 should just be silently ignored, but in the - // offhand chance not, I'm documenting known cases of unknown register use here. - // - // 0x7F --> - // the bios likes to write to 0x7f using an EOP giftag with NLOOP set to 4. - // Not sure what it's trying to accomplish exactly. Ignoring seems to work fine, - // and is probably the intended behavior (it's likely meant to be a NOP). - // - // 0xEE --> - // .hack Infection [PAL confirmed, NTSC unknown] uses 0xee when you zoom the camera. - // The use hasn't been researched yet so parameters are unknown. Everything seems - // to work fine as usual -- The 0xEE address in common programming terms is typically - // left over uninitialized data, and this might be a case of that, which is to be - // silently ignored. - // - // Guitar Hero 3+ : Massive spamming when using superVU (along with several VIF errors) - // Using microVU avoids the GIFtag errors, so probably just one of sVU's hacks conflicting - // with one of VIF's hacks, and causing corrupted packet data. - - if( regidx != 0x7f /*&& regidx != 0xee*/ ) - DevCon.Warning( "Ignoring Unmapped GIFtag Register, Index = %02x", regidx ); -} - -#define INSERT_UNMAPPED_4 RegHandlerUNMAPPED, RegHandlerUNMAPPED, RegHandlerUNMAPPED, RegHandlerUNMAPPED, -#define INSERT_UNMAPPED_16 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 -#define INSERT_UNMAPPED_64 INSERT_UNMAPPED_16 INSERT_UNMAPPED_16 INSERT_UNMAPPED_16 INSERT_UNMAPPED_16 - -static __aligned16 GifPathStruct s_gifPath = -{ - RegHandlerSIGNAL, RegHandlerFINISH, RegHandlerLABEL, RegHandlerUNMAPPED, - - // Rest are mapped to Unmapped - INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 INSERT_UNMAPPED_4 - INSERT_UNMAPPED_64 INSERT_UNMAPPED_64 INSERT_UNMAPPED_16 -}; - -// -------------------------------------------------------------------------------------- -// GIFPath Method Implementations -// -------------------------------------------------------------------------------------- - -GIFPath::GIFPath() : tag() -{ - Reset(); -} - -__fi void GIFPath::Reset() -{ - memzero(*this); - const_cast(tag).EOP = 1; -} - -__fi bool GIFPath::StepReg() -{ - if (++curreg >= numregs) { - curreg = 0; - if (--nloop == 0) { - return false; - } - } - return true; -} - -__fi u8 GIFPath::GetReg() { return regs[curreg]; } - -// Unpack the registers - registers are stored as a sequence of 4 bit values in the -// upper 64 bits of the GIFTAG. That sucks for us when handling partialized GIF packets -// coming in from paths 2 and 3, so we unpack them into an 8 bit array here. -// -__fi void GIFPath::PrepPackedRegs() -{ - // Only unpack registers if we're starting a new pack. Otherwise the unpacked - // array should have already been initialized by a previous partial transfer. - - if (curreg != 0) return; - DetectE = 0; - u32 tempreg = tag.REGS[0]; - numregs = ((tag.NREG-1)&0xf) + 1; - - for (u32 i = 0; i < numregs; i++) { - if (i == 8) tempreg = tag.REGS[1]; - regs[i] = tempreg & 0xf; - if(regs[i] == 0xe) DetectE++; - tempreg >>= 4; - } -} - - -template< bool Aligned > -__fi void GIFPath::SetTag(const void* mem) -{ - _mm_store_ps( (float*)&tag, Aligned ? _mm_load_ps((const float*)mem) : _mm_loadu_ps((const float*)mem) ); - - nloop = tag.NLOOP; - curreg = 0; -} - -__fi bool GIFPath::IsActive() const -{ - return (nloop != 0) || !tag.EOP; -} - -static __fi void gsHandler(const u8* pMem) -{ - const int reg = pMem[8]; - - if (reg == 0x50) - { - vif1.BITBLTBUF._u64 = *(u64*)pMem; - } - else if (reg == 0x52) - { - vif1.TRXREG._u64 = *(u64*)pMem; - } - else if (reg == 0x53) - { - // local -> host - if ((pMem[0] & 3) == 1) - { - //Onimusha does TRXREG without BLTDIVIDE first, so we "assume" 32bit for this equation, probably isnt important. - // ^ WTF, seriously? This is really important (pseudonym) - u8 bpp = 32; - - switch(vif1.BITBLTBUF.SPSM & 7) - { - case 0: - bpp = 32; - break; - case 1: - bpp = 24; - break; - case 2: - bpp = 16; - break; - case 3: - bpp = 8; - break; - // 4 is 4 bit but this is forbidden - default: - Console.Error("Illegal format for GS upload: SPSM=0%02o", vif1.BITBLTBUF.SPSM); - } - - VIF_LOG("GS Download %dx%d SPSM=%x bpp=%d", vif1.TRXREG.RRW, vif1.TRXREG.RRH, vif1.BITBLTBUF.SPSM, bpp); - - // qwords, rounded down; any extra bits are lost - // games must take care to ensure transfer rectangles are exact multiples of a qword - vif1.GSLastDownloadSize = vif1.TRXREG.RRW * vif1.TRXREG.RRH * bpp >> 7; - //DevCon.Warning("GS download in progress"); - gifRegs.stat.OPH = true; - } - } - if (reg >= 0x60) - { - // Question: What happens if an app writes to uncharted register space on real PS2 - // hardware (handler 0x63 and higher)? Probably a silent ignorance, but not tested - // so just guessing... --air - - s_gifPath.Handlers[reg-0x60]((const u32*)pMem); - } -} - -#define incTag(y) do { \ - pMem += (y*16); \ - size -= (y); \ -} while(false) - -#define aMin(x, y) std::min(x, y) - -// Parameters: -// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the -// path does not terminate (EOP) within the specified size, it is assumed that the path must -// loop around to the start of VU memory and continue processing. -__fi int GIFPath::ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size) -{ - u32 startSize = size; // Start Size - - while (size > 0) { - if (!nloop) { - - SetTag(pMem); - incTag(1); - } - else - { - switch(tag.FLG) { - case GIF_FLG_PACKED: - { - GifTagLog("Packed Mode"); - numregs = ((tag.NREG-1)&0xf) + 1; - - // Note: curreg is *usually* zero here, but can be non-zero if a previous fragment was - // handled via this optimized copy code below. - - const u32 listlen = (nloop * numregs) - curreg; // the total length of this packed register list (in QWC) - u32 len; - - if(size < listlen) - { - len = size; - - // We need to calculate both the number of full iterations of regs copied (nloops), - // and any remaining registers not copied by this fragment. A div/mod pair should - // hopefully be optimized by the compiler into a single x86 div. :) - - const int nloops_copied = len / numregs; - const int regs_not_copied = len % numregs; - - // Make sure to add regs_not_copied to curreg, to handle cases of multiple partial fragments. - // (example: 3 fragments each of only 2 regs, then curreg should be 0, 2, and then 4 after - // each call to GIFPath_Parse; with no change to NLOOP). Because of this we also need to - // check for cases where curreg wraps past an nloop. - - nloop -= nloops_copied; - curreg += regs_not_copied; - if(curreg >= numregs) - { - --nloop; - curreg -= numregs; - } - } - else - { - len = listlen; - curreg = 0; - nloop = 0; - } - incTag(len); - } - break; - case GIF_FLG_REGLIST: - { - GifTagLog("Reglist Mode EOP %x", tag.EOP); - - // In reglist mode, the GIF packs 2 registers into each QWC. The nloop however - // can be an odd number, in which case the upper half of the final QWC is ignored (skipped). - - numregs = ((tag.NREG-1)&0xf) + 1; - const u32 total_reglen = (nloop * numregs) - curreg; // total 'expected length' of this packed register list (in registers) - const u32 total_listlen = (total_reglen+1) / 2; // total 'expected length' of the register list, in QWC! (+1 so to round it up) - - u32 len; - - if(size < total_listlen) - { - //Console.Warning("GIF path %d Fragmented REGLIST! Please report if you experience problems", pathidx + 1); - - len = size; - const u32 reglen = len * 2; - - const int nloops_copied = reglen / numregs; - const int regs_not_copied = reglen % numregs; - - //DevCon.Warning("Hit it path %d", pathidx + 1); - curreg += regs_not_copied; - nloop -= nloops_copied; - - if(curreg >= numregs) - { - --nloop; - curreg -= numregs; - } - } - else - { - len = total_listlen; - curreg = 0; - nloop = 0; - } - - incTag(len); - //if(curreg != 0 || (len % numregs) > 0) DevCon.Warning("Oops c %x n %x m %x r %x", curreg, nloop, (len % numregs), numregs); - } - break; - case GIF_FLG_IMAGE: - case GIF_FLG_IMAGE2: - { - GifTagLog("IMAGE Mode"); - int len = aMin(size, nloop); - incTag(len); - nloop -= len; - } - break; - } - } - if(pathidx == GIF_PATH_1) - { - if(size == 0 && (!tag.EOP || nloop > 0)) - { - if(startSize < 0x400) - { - size = 0x400 - startSize; - startSize = 0x400; - pMem -= 0x4000; - } - else - { - // Note: The BIOS does an XGKICK on the VU1 and lets it DMA to the GS without an EOP - // (seemingly to loop forever), only to write an EOP later on. No other game is known to - // do anything of the sort. - // So lets just cap the DMA at 16k, and force it to "look" like it's terminated for now. - // (note: truly accurate emulation would mean having the VU1's XGKICK break execution, - // split time to EE and other processors, and then resume the kick's DMA later. - // ... yea, not happening for a while. ;) -- air - - Console.Warning("GIFTAG warning, size exceeded VU memory size %x", startSize); - nloop = 0; - const_cast(tag).EOP = 1; - } - } - } - if (tag.EOP && !nloop) break; - } - - size = (startSize - size); - - - return size; -} - -#define copyTag() do { \ - _mm_store_ps( (float*)&RingBuffer.m_Ring[ringpos], Aligned ? _mm_load_ps((float*)pMem128) : _mm_loadu_ps((float*)pMem128)); \ - ++pMem128; --size; \ - ringpos = (ringpos+1)&RingBufferMask; \ -} while(false) - -// Parameters: -// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the -// path does not terminate (EOP) within the specified size, it is assumed that the path must -// loop around to the start of VU memory and continue processing. -template< GIF_PATH pathidx, bool Aligned > -__fi int GIFPath::CopyTag(const u128* pMem128, u32 size) -{ - uint& ringpos = GetMTGS().m_packet_writepos; - const uint original_ringpos = ringpos; - - - - u32 startSize = size; // Start Size - - while (size > 0) { - if (!nloop) { - - SetTag((u8*)pMem128); - copyTag(); - - GifTagLog("\tSetTag: %ls Path %d", tag.ToString().c_str(), pathidx + 1); - - if(nloop > 0) - { - switch(pathidx) - { - case GIF_PATH_1: - if(tag.FLG & 2)GSTransferStatus.PTH1 = IMAGE_MODE; - else GSTransferStatus.PTH1 = TRANSFER_MODE; - break; - case GIF_PATH_2: - if(tag.FLG & 2)GSTransferStatus.PTH2 = IMAGE_MODE; - else GSTransferStatus.PTH2 = TRANSFER_MODE; - break; - case GIF_PATH_3: - if(tag.FLG & 2) GSTransferStatus.PTH3 = IMAGE_MODE; - else GSTransferStatus.PTH3 = TRANSFER_MODE; - break; - } - gifRegs.stat.OPH = true; - gifRegs.stat.APATH = pathidx + 1; - } - - - - if(nloop == 0 && tag.EOP) - { - break; - } - - } - else - { - switch(pathidx) - { - case GIF_PATH_1: - if(tag.FLG & 2)GSTransferStatus.PTH1 = IMAGE_MODE; - else GSTransferStatus.PTH1 = TRANSFER_MODE; - break; - case GIF_PATH_2: - if(tag.FLG & 2)GSTransferStatus.PTH2 = IMAGE_MODE; - else GSTransferStatus.PTH2 = TRANSFER_MODE; - break; - case GIF_PATH_3: - if(tag.FLG & 2) GSTransferStatus.PTH3 = IMAGE_MODE; - else GSTransferStatus.PTH3 = TRANSFER_MODE; - - break; - } - gifRegs.stat.APATH = pathidx + 1; - gifRegs.stat.OPH = true; - - switch(tag.FLG) { - case GIF_FLG_PACKED: - GifTagLog("Packed Mode EOP %x : %ls", tag.EOP, tag.DumpRegsToString().c_str()); - PrepPackedRegs(); - - if(DetectE > 0) - { - do { - if (GetReg() == 0xe) { - gsHandler((u8*)pMem128); - } - copyTag(); - } while(StepReg() && size > 0 && SIGNAL_IMR_Pending == false); - } - else - { - //DevCon.WriteLn(Color_Orange, "No E detected on Path%d: nloop=%x, numregs=%x, curreg=%x, size=%x", pathidx + 1, nloop, numregs, curreg, size); - - // Note: curreg is *usually* zero here, but can be non-zero if a previous fragment was - // handled via this optimized copy code below. - - const u32 listlen = (nloop * numregs) - curreg; // the total length of this packed register list (in QWC) - u32 len; - - if(size < listlen) - { - len = size; - - // We need to calculate both the number of full iterations of regs copied (nloops), - // and any remaining registers not copied by this fragment. A div/mod pair should - // hopefully be optimized by the compiler into a single x86 div. :) - - const int nloops_copied = len / numregs; - const int regs_not_copied = len % numregs; - - // Make sure to add regs_not_copied to curreg, to handle cases of multiple partial fragments. - // (example: 3 fragments each of only 2 regs, then curreg should be 0, 2, and then 4 after - // each call to GIFPath_Parse; with no change to NLOOP). Because of this we also need to - // check for cases where curreg wraps past an nloop. - - nloop -= nloops_copied; - curreg += regs_not_copied; - if(curreg >= numregs) - { - --nloop; - curreg -= numregs; - } - } - else - { - len = listlen; - curreg = 0; - nloop = 0; - } - - MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len ); - pMem128 += len; - size -= len; - } - break; - case GIF_FLG_REGLIST: - { - GifTagLog("Reglist Mode EOP %x", tag.EOP); - - // In reglist mode, the GIF packs 2 registers into each QWC. The nloop however - // can be an odd number, in which case the upper half of the final QWC is ignored (skipped). - - numregs = ((tag.NREG-1)&0xf) + 1; - const u32 total_reglen = (nloop * numregs) - curreg; // total 'expected length' of this packed register list (in registers) - const u32 total_listlen = (total_reglen+1) / 2; // total 'expected length' of the register list, in QWC! (+1 so to round it up) - - u32 len; - - if(size < total_listlen) - { - //Console.Warning("GIF path %d Fragmented REGLIST! Please report if you experience problems", pathidx + 1); - - len = size; - const u32 reglen = len * 2; - - const int nloops_copied = reglen / numregs; - const int regs_not_copied = reglen % numregs; - - //DevCon.Warning("Hit it path %d", pathidx + 1); - curreg += regs_not_copied; - nloop -= nloops_copied; - - if(curreg >= numregs) - { - --nloop; - curreg -= numregs; - } - } - else - { - len = total_listlen; - curreg = 0; - nloop = 0; - } - - MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len ); - pMem128 += len; - size -= len; - } - break; - case GIF_FLG_IMAGE: - case GIF_FLG_IMAGE2: - { - GifTagLog("IMAGE Mode EOP %x", tag.EOP); - if(pathidx == GIF_PATH_3 && gifRegs.stat.IMT) - { - //Size or Nloop can be smaller, before we enforce a maximum packet size of 8, we need to know which is true. - int len = aMin(size, nloop); - len = aMin(len, 8); - MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len ); - - pMem128 += len; - size -= len; - nloop -= len; - break; - } - else - { - - int len = aMin(size, nloop); - MemCopy_WrappedDest( pMem128, RingBuffer.m_Ring, ringpos, RingBufferSize, len ); - - pMem128 += len; - size -= len; - nloop -= len; - } - } - break; - } - } - - if(pathidx == GIF_PATH_1) - { - if(size == 0 && (!tag.EOP || nloop > 0)) - { - if(startSize < 0x3ff) - { - size = 0x3ff - startSize; - startSize = 0x3ff; - pMem128 -= 0x400; - } - else - { - // Note: The BIOS does an XGKICK on the VU1 and lets it DMA to the GS without an EOP - // (seemingly to loop forever), only to write an EOP later on. No other game is known to - // do anything of the sort. - // So lets just cap the DMA at 16k, and force it to "look" like it's terminated for now. - // (note: truly accurate emulation would mean having the VU1's XGKICK break execution, - // split time to EE and other processors, and then resume the kick's DMA later. - // ... yea, not happening for a while. ;) -- air - - Console.Warning("GIFTAG warning, size exceeded VU memory size %x", startSize); - nloop = 0; - const_cast(tag).EOP = 1; - - // Don't send the packet to the GS -- its incomplete and might cause the GS plugin - // to get confused and die. >_< - - ringpos = original_ringpos; - } - } - } - if (tag.EOP && !nloop) break; - - if(SIGNAL_IMR_Pending == true) - { - //DevCon.Warning("Path %x", pathidx + 1); - break; - } - } - - size = (startSize - size); - - if (tag.EOP && nloop == 0) { - - /*if(gifRegs.stat.DIR == 0)gifRegs.stat.OPH = false; - gifRegs.stat.APATH = GIF_APATH_IDLE;*/ - switch(pathidx) - { - case GIF_PATH_1: - GSTransferStatus.PTH1 = STOPPED_MODE; - break; - case GIF_PATH_2: - GSTransferStatus.PTH2 = PENDINGSTOP_MODE; - break; - case GIF_PATH_3: - //For huge chunks we may have delay problems, so we need to stall it till the interrupt, else we get desync (Lemmings) - GSTransferStatus.PTH3 = PENDINGSTOP_MODE; - MSKPATH3_LOG("Path3 Finishing GIFTag packet"); - break; - } - - if (CSRreg.FINISH) - { - // IMPORTANT: only signal FINISH if ALL THREE paths are stopped (nloop is zero and EOP is set) - // FINISH is *not* a per-path register, and it seems to pretty clearly indicate that all active - // drawing *and* image transfer actions must be finished before the IRQ raises. - - if(gifRegs.stat.P1Q || gifRegs.stat.P2Q || gifRegs.stat.P3Q) - { - //GH3 and possibly others have path data queued waiting for another path to finish! we need to check they are done too - //DevCon.Warning("Early FINISH signal! P1 %x P2 %x P3 %x", gifRegs.stat.P1Q, gifRegs.stat.P2Q, gifRegs.stat.P3Q); - } - else if (!(GSIMR&0x200) && !s_gifPath.path[0].IsActive() && !s_gifPath.path[1].IsActive() && !s_gifPath.path[2].IsActive()) - { - gsIrq(); - } - } - } - else if( nloop == 0) - { - //Need to set GIF as WAITING, sometimes it can get stuck in a bit of a loop if other paths think it's still doing REGLIST for example. - //Do NOT use IDLE mode here, it will freak Path3 masking out if it gets used. - switch(pathidx) - { - case GIF_PATH_1: - GSTransferStatus.PTH1 = WAITING_MODE; - break; - case GIF_PATH_2: - GSTransferStatus.PTH2 = WAITING_MODE; - break; - case GIF_PATH_3: - //Required, if GIF_FIFO writes NOP to GIF Tag it can leave it set waiting which causes VIF FlushA to fail - if(GSTransferStatus.PTH3 < PENDINGSTOP_MODE) GSTransferStatus.PTH3 = WAITING_MODE; - break; - } - } - - if(pathidx == 2) - { - //if(nloop <= 16 && GSTransferStatus.PTH3 == IMAGE_MODE)GSTransferStatus.PTH3 = PENDINGIMAGE_MODE; - if (gifch.chcr.STR) { //Make sure we are really doing a DMA and not using FIFO - //GIF_LOG("Path3 end EOP %x NLOOP %x Status %x", tag.EOP, nloop, GSTransferStatus.PTH3); - gifch.madr += size * 16; - gifch.qwc -= size; - hwDmacSrcTadrInc(gifch); - } - } - - return size; -} - -// Parameters: -// size - max size of incoming data stream, in qwc (simd128). If the path is PATH1, and the -// path does not terminate (EOP) within the specified size, it is assumed that the path must -// loop around to the start of VU memory and continue processing. -__fi int GIFPath_CopyTag(GIF_PATH pathidx, const u128* pMem, u32 size) -{ - switch( pathidx ) - { - case GIF_PATH_1: - pxAssertMsg(!s_gifPath[GIF_PATH_2].IsActive(), "GIFpath conflict: Attempted to start PATH1 while PATH2 is already active."); - pxAssertMsg(!s_gifPath[GIF_PATH_3].IsActive() || (GSTransferStatus.PTH3 == IMAGE_MODE), "GIFpath conflict: Attempted to start PATH1 while PATH3 is already active."); - return s_gifPath[GIF_PATH_1].CopyTag(pMem, size); - case GIF_PATH_2: - pxAssertMsg(!s_gifPath[GIF_PATH_1].IsActive(), "GIFpath conflict: Attempted to start PATH2 while PATH1 is already active."); - pxAssertMsg(!s_gifPath[GIF_PATH_3].IsActive() || (GSTransferStatus.PTH3 == IMAGE_MODE), "GIFpath conflict: Attempted to start PATH2 while PATH3 is already active."); - return s_gifPath[GIF_PATH_2].CopyTag(pMem, size); - case GIF_PATH_3: - pxAssertMsg(!s_gifPath[GIF_PATH_1].IsActive(), "GIFpath conflict: Attempted to start PATH3 while PATH1 is already active."); - pxAssertMsg(!s_gifPath[GIF_PATH_2].IsActive(), "GIFpath conflict: Attempted to start PATH3 while PATH2 is already active."); - return s_gifPath[GIF_PATH_3].CopyTag(pMem, size); - - jNO_DEFAULT; - } - - return 0; // unreachable -} - -// Quick version for queuing PATH1 data. -// This version calculates the real length of the packet data only. It does not process -// IRQs or DMA status updates. -__fi int GIFPath_ParseTagQuick(GIF_PATH pathidx, const u8* pMem, u32 size) -{ - int retSize = s_gifPath[pathidx].ParseTagQuick(pathidx, pMem, size); - return retSize; -} - -// Clears all GIFpath data to zero. -void GIFPath_Reset() -{ - for(uint i=0; i<3; ++i ) - s_gifPath.path[i].Reset(); -} - -// This is a hackfix tool provided for "canceling" the contents of the GIFpath when -// invalid GIFdma states are encountered (typically needed for PATH3 only). -__fi void GIFPath_Clear( GIF_PATH pathidx ) -{ - memzero(s_gifPath.path[pathidx]); - s_gifPath.path[pathidx].Reset(); - - GSTransferStatus._u32 &= ~(0xf << (pathidx * 4)); - GSTransferStatus._u32 |= (0x5 << (pathidx * 4)); - if( GSgifSoftReset == NULL ) return; - GetMTGS().SendSimplePacket( GS_RINGTYPE_SOFTRESET, (1< + + + + @@ -488,594 +496,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1147,14 +567,6 @@ RelativePath=".\IopSif.cpp" > - - - - @@ -1163,22 +575,6 @@ RelativePath="..\..\R3000A.h" > - - - - - - - - @@ -1259,107 +655,35 @@ > - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + @@ -1793,32 +1117,720 @@ + + - - + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + pState.viBackUp == xReg) { + DevCon.WriteLn(Color_Green, "microVU%d: Loading Branch VI value from previous block", getIndex); if (i == 0) warn = 1; infoVar = 1; j = i; i++; @@ -383,6 +396,7 @@ static void analyzeBranchVI(mV, int xReg, bool &infoVar) { j = i; } elif (i == 0) break; + cyc += mVUstall + 1; incPC2(-2); } if (i) { @@ -393,14 +407,14 @@ static void analyzeBranchVI(mV, int xReg, bool &infoVar) { infoVar = 1; } iPC = bPC; - DevCon.WriteLn(Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x]", getIndex, j+1, xPC); + DevCon.WriteLn(Color_Green, "microVU%d: Branch VI-Delay (%d) [%04x][%03d]", getIndex, j+1, xPC, mVU.prog.cur->idx); } else iPC = bPC; } /* // Dead Code... the old version of analyzeBranchVI() -__fi void analyzeBranchVI(mV, int xReg, bool &infoVar) { +__fi void analyzeBranchVI(mV, int xReg, bool& infoVar) { if (!xReg) return; int i; int iEnd = aMin(5, (mVUcount+1)); @@ -469,7 +483,7 @@ __ri int mVUbranchCheck(mV) { __fi void mVUanalyzeCondBranch1(mV, int Is) { analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); - if (!mVUbranchCheck(mVU) && !mVUstall) { + if (!mVUbranchCheck(mVU)) { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); } } @@ -477,7 +491,7 @@ __fi void mVUanalyzeCondBranch1(mV, int Is) { __fi void mVUanalyzeCondBranch2(mV, int Is, int It) { analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]); analyzeVIreg1(mVU, It, mVUlow.VI_read[1]); - if (!mVUbranchCheck(mVU) && !mVUstall) { + if (!mVUbranchCheck(mVU)) { analyzeBranchVI(mVU, Is, mVUlow.memReadIs); analyzeBranchVI(mVU, It, mVUlow.memReadIt); } diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 89afe47da1..7d215ab42c 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -1,19 +1,16 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2009 Pcsx2 Team +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2010 PCSX2 Dev Team * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . */ #pragma once diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 1008812c72..9a9c0288ca 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -273,17 +273,17 @@ mVUop(mVU_EEXP) { // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { - if( x86caps.hasStreamingSIMD4Extensions ) { + if (x86caps.hasStreamingSIMD4Extensions) { xDP.PS(Fs, Fs, 0x71); xMOVSS(PQ, Fs); } else { - SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2 + SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2 xMOVSS (PQ, Fs); // x ^ 2 xPSHUF.D (Fs, Fs, 0xe1); // wzyx -> wzxy - SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 - xPSHUF.D (Fs, Fs, 0xD2); // wzxy -> wxyz - SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2 + SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + xPSHUF.D (Fs, Fs, 0xd2); // wzxy -> wxyz + SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2 } } @@ -319,7 +319,7 @@ mVUop(mVU_ERLENG) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); } pass2 { const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); xSQRT.SS (xmmPQ, xmmPQ); xMOVSSZX (Fs, ptr32[mVUglob.one]); @@ -374,13 +374,6 @@ mVUop(mVU_ESADD) { pass3 { mVUlog("ESADD P"); } } -#define esinHelper(addr) { \ - SSE_MULSS(mVU, t2, t1); \ - xMOVAPS (Fs, t2); \ - xMUL.SS (Fs, ptr32[addr]); \ - SSE_ADDSS(mVU, xmmPQ, Fs); \ -} - mVUop(mVU_ESIN) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); } pass2 { @@ -388,19 +381,27 @@ mVUop(mVU_ESIN) { const xmm& t1 = mVU.regAlloc->allocReg(); const xmm& t2 = mVU.regAlloc->allocReg(); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - xMOVSS (xmmPQ, Fs); - xMOVAPS (t1, Fs); - SSE_MULSS(mVU, Fs, t1); - xMOVAPS (t2, Fs); - SSE_MULSS(mVU, Fs, t1); - xMOVAPS (t1, Fs); - xMUL.SS (Fs, ptr32[mVUglob.S2]); - SSE_ADDSS(mVU, xmmPQ, Fs); - esinHelper(mVUglob.S3); - esinHelper(mVUglob.S4); - SSE_MULSS(mVU, t2, t1); - xMUL.SS (t2, ptr32[mVUglob.S5]); - SSE_ADDSS(mVU, xmmPQ, t2); + xMOVSS (xmmPQ, Fs); // pq = X + SSE_MULSS(mVU, Fs, Fs); // fs = X^2 + xMOVAPS (t1, Fs); // t1 = X^2 + SSE_MULSS(mVU, Fs, xmmPQ); // fs = X^3 + xMOVAPS (t2, Fs); // t2 = X^3 + xMUL.SS (Fs, ptr32[mVUglob.S2]); // fs = s2 * X^3 + SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + + SSE_MULSS(mVU, t2, t1); // t2 = X^3 * X^2 + xMOVAPS (Fs, t2); // fs = X^5 + xMUL.SS (Fs, ptr32[mVUglob.S3]); // ps = s3 * X^5 + SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5 + + SSE_MULSS(mVU, t2, t1); // t2 = X^5 * X^2 + xMOVAPS (Fs, t2); // fs = X^7 + xMUL.SS (Fs, ptr32[mVUglob.S4]); // fs = s4 * X^7 + SSE_ADDSS(mVU, xmmPQ, Fs); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7 + + SSE_MULSS(mVU, t2, t1); // t2 = X^7 * X^2 + xMUL.SS (t2, ptr32[mVUglob.S5]); // t2 = s5 * X^9 + SSE_ADDSS(mVU, xmmPQ, t2); // pq = X + s2 * X^3 + s3 * X^5 + s4 * X^7 + s5 * X^9 xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(t1); @@ -1150,8 +1151,8 @@ static __fi void mVU_XGKICK_DELAY(mV, bool memVI) { mVUop(mVU_XGKICK) { pass1 { mVUanalyzeXGkick(mVU, _Is_, mVU_XGKICK_CYCLES); } pass2 { - if (!mVU_XGKICK_CYCLES) { mVU_XGKICK_DELAY(mVU, 0); return; } - else if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); mVUinfo.doXGKICK = 0; } + if (!mVU_XGKICK_CYCLES) { mVU_XGKICK_DELAY(mVU, 0); return; } + elif (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); mVUinfo.doXGKICK = 0; } mVUallocVIa(mVU, gprT1, _Is_); xMOV(ptr32[&mVU.VIxgkick], gprT1); }