From 888a309e1ac6852315e90f0356dca88b0f67f146 Mon Sep 17 00:00:00 2001 From: "gregory.hainaut@gmail.com" Date: Wed, 15 Sep 2010 16:54:19 +0000 Subject: [PATCH] GregMiscellaneous: sync and refresh the branch (3728:3768) git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3769 96395faa-99c1-11dd-bbfe-3dabce05a288 --- cmake/BuildParameters.cmake | 4 +- debian-unstable-upstream/control | 1 - debian-unstable-upstream/control_ppa | 1 - debian-unstable-upstream/rules | 4 + debian-unstable-upstream/rules_fglrx | 4 + pcsx2/CMakeLists.txt | 2 +- pcsx2/COP0.cpp | 12 +- pcsx2/COP0.h | 2 +- pcsx2/Counters.cpp | 8 +- pcsx2/Dump.cpp | 4 +- pcsx2/FiFo.cpp | 16 +- pcsx2/HwRead.cpp | 18 +- pcsx2/HwWrite.cpp | 4 +- pcsx2/IPU/IPUdma.cpp | 5 +- pcsx2/Interpreter.cpp | 2 +- pcsx2/IopCounters.cpp | 2 +- pcsx2/IopDma.cpp | 6 +- pcsx2/Linux/pcsx2.cbp | 1 - pcsx2/R3000A.cpp | 40 +-- pcsx2/R3000A.h | 10 +- pcsx2/R3000AInterpreter.cpp | 12 +- pcsx2/R5900.cpp | 107 ++---- pcsx2/R5900.h | 10 +- pcsx2/SaveState.cpp | 4 +- pcsx2/SaveState.h | 2 +- pcsx2/VUmicro.cpp | 12 +- pcsx2/Vif.cpp | 73 ++-- pcsx2/Vif.h | 2 - pcsx2/Vif1_Dma.cpp | 52 +-- pcsx2/Vif1_MFIFO.cpp | 4 - pcsx2/Vif_Codes.cpp | 24 +- pcsx2/Vif_Dma.h | 18 +- pcsx2/Vif_Transfer.cpp | 2 +- pcsx2/Vif_Unpack.cpp | 329 ++++++------------ pcsx2/Vif_Unpack.h | 36 +- pcsx2/Vif_Unpack.inl | 159 --------- pcsx2/ps2/GIFpath.cpp | 11 +- pcsx2/ps2/Iop/IopHw_Internal.h | 2 +- pcsx2/ps2/eeHwTraceLog.inl | 2 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 - pcsx2/x86/iR3000A.cpp | 44 +-- pcsx2/x86/ix86-32/iR5900-32.cpp | 10 +- pcsx2/x86/newVif.h | 39 +-- pcsx2/x86/newVif_Dynarec.cpp | 103 +++--- pcsx2/x86/newVif_Unpack.cpp | 253 +++++++------- pcsx2/x86/newVif_UnpackSSE.cpp | 25 -- pcsx2/x86/newVif_UnpackSSE.h | 1 - plugins/zzogl-pg/opengl/CMakeLists.txt | 5 + plugins/zzogl-pg/opengl/GLWin32.cpp | 1 + plugins/zzogl-pg/opengl/GLWinX11.cpp | 4 +- plugins/zzogl-pg/opengl/GS.h | 46 ++- plugins/zzogl-pg/opengl/GSmain.cpp | 15 +- plugins/zzogl-pg/opengl/Linux/Linux.cpp | 2 - .../opengl/Linux/zzogl-pg/zzogl-pg.cbp | 4 +- plugins/zzogl-pg/opengl/NewRegs.cpp | 2 +- plugins/zzogl-pg/opengl/Regs.cpp | 15 +- plugins/zzogl-pg/opengl/Util.h | 3 +- plugins/zzogl-pg/opengl/Win32/Win32.cpp | 3 +- plugins/zzogl-pg/opengl/Win32/resrc1.h | 13 +- plugins/zzogl-pg/opengl/Win32/zerogs.rc | 285 +++++++++++---- .../opengl/Win32/zerogsogl_2008.vcproj | 16 + plugins/zzogl-pg/opengl/ZZGl.h | 141 -------- plugins/zzogl-pg/opengl/ZZKeyboard.cpp | 33 +- plugins/zzogl-pg/opengl/ZZLog.cpp | 21 +- plugins/zzogl-pg/opengl/ZZLog.h | 3 +- plugins/zzogl-pg/opengl/ZZoglCRTC.cpp | 41 +-- plugins/zzogl-pg/opengl/ZZoglCreate.cpp | 228 +++--------- plugins/zzogl-pg/opengl/ZZoglFlush.cpp | 193 +++++----- plugins/zzogl-pg/opengl/ZZoglShaders.cpp | 157 ++++++++- plugins/zzogl-pg/opengl/ZZoglShaders.h | 243 +++++++++++++ .../opengl/ZeroGSShaders/zerogsshaders.h | 4 +- plugins/zzogl-pg/opengl/targets.cpp | 203 +++++------ plugins/zzogl-pg/opengl/targets.h | 28 +- plugins/zzogl-pg/opengl/zerogs.cpp | 114 ++---- plugins/zzogl-pg/opengl/zerogs.h | 51 +-- 75 files changed, 1562 insertions(+), 1798 deletions(-) delete mode 100644 pcsx2/Vif_Unpack.inl create mode 100644 plugins/zzogl-pg/opengl/ZZoglShaders.h diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake index a23bf175e2..a93fc98d79 100644 --- a/cmake/BuildParameters.cmake +++ b/cmake/BuildParameters.cmake @@ -99,7 +99,7 @@ if(DEFINED USER_CMAKE_C_FLAGS) string(STRIP "${USER_CMAKE_C_FLAGS}" CMAKE_C_FLAGS) endif(DEFINED USER_CMAKE_C_FLAGS) # Use some default machine flags -string(STRIP "${CMAKE_C_FLAGS} -m32 -msse -msse2 -march=i686" CMAKE_C_FLAGS) +string(STRIP "${CMAKE_C_FLAGS} -m32 -msse -msse2 -march=i686 -pthread" CMAKE_C_FLAGS) ### C++ flags @@ -110,7 +110,7 @@ if(DEFINED USER_CMAKE_CXX_FLAGS) string(STRIP "${USER_CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS) endif(DEFINED USER_CMAKE_CXX_FLAGS) # Use some default machine flags -string(STRIP "${CMAKE_CXX_FLAGS} -m32 -msse -msse2 -march=i686" CMAKE_CXX_FLAGS) +string(STRIP "${CMAKE_CXX_FLAGS} -m32 -msse -msse2 -march=i686 -pthread" CMAKE_CXX_FLAGS) #------------------------------------------------------------------------------- # Select library system vs 3rdparty diff --git a/debian-unstable-upstream/control b/debian-unstable-upstream/control index d265f62770..43fa20640f 100644 --- a/debian-unstable-upstream/control +++ b/debian-unstable-upstream/control @@ -21,7 +21,6 @@ Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.15.5.6), cmake (>=2.8), libsoundtouch1-dev (>= 1.3), libsparsehash-dev (>= 1.6), libx11-dev, - libxxf86vm-dev, libglew1.5-dev (>= 1.5.1), libgl1-mesa-dev, libglu1-mesa-dev, diff --git a/debian-unstable-upstream/control_ppa b/debian-unstable-upstream/control_ppa index 109338502a..6a0e8af919 100644 --- a/debian-unstable-upstream/control_ppa +++ b/debian-unstable-upstream/control_ppa @@ -19,7 +19,6 @@ Build-Depends: debhelper (>= 7.0.50), dpkg-dev (>= 1.15.5.6), cmake (>=2.8), libsoundtouch1-dev (>= 1.3), libsparsehash-dev (>= 1.6), libx11-dev, - libxxf86vm-dev, libglew1.5-dev (>= 1.5.1), libgl1-mesa-dev, libglu1-mesa-dev, diff --git a/debian-unstable-upstream/rules b/debian-unstable-upstream/rules index 788f6a4215..3b4533aef0 100755 --- a/debian-unstable-upstream/rules +++ b/debian-unstable-upstream/rules @@ -77,6 +77,10 @@ override_dh_strip: dh_strip --package=pcsx2-unstable --dbg-package=pcsx2-unstable-dbg dh_strip --package=pcsx2-plugins-unstable --dbg-package=pcsx2-plugins-unstable-dbg +# Avoid to relaunch the compilation twice. (build and dh_auto_build target) +override_dh_auto_build: + # Do nothing + %: dh $@ --parallel diff --git a/debian-unstable-upstream/rules_fglrx b/debian-unstable-upstream/rules_fglrx index 5e7540a5d5..ebd5629269 100755 --- a/debian-unstable-upstream/rules_fglrx +++ b/debian-unstable-upstream/rules_fglrx @@ -77,6 +77,10 @@ override_dh_strip: dh_strip --package=pcsx2-unstable --dbg-package=pcsx2-unstable-dbg dh_strip --package=pcsx2-plugins-unstable --dbg-package=pcsx2-plugins-unstable-dbg +# Avoid to relaunch the compilation twice. (build and dh_auto_build target) +override_dh_auto_build: + # Do nothing + %: dh $@ --parallel diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 82155f8e83..d3f9f9d370 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -14,6 +14,7 @@ set(CommonFlags -fno-dse -fno-tree-dse -fno-strict-aliasing + -Wstrict-aliasing # Allow to track strict aliasing issue. -pipe -Wno-format -Wno-unused-parameter @@ -225,7 +226,6 @@ set(pcsx2Headers Vif_Dma.h Vif.h Vif_Unpack.h - Vif_Unpack.inl vtlb.h VUflags.h VUmicro.h diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index 8ce8290f29..0911eefde6 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -37,7 +37,8 @@ __ri void UpdateCP0Status() { void __fastcall WriteCP0Status(u32 value) { cpuRegs.CP0.n.Status.val = value; - UpdateCP0Status(); + cpuUpdateOperationMode(); + cpuSetNextEventDelta(4); } void MapTLB(int i) @@ -532,7 +533,8 @@ void ERET() { cpuRegs.pc = cpuRegs.CP0.n.EPC; cpuRegs.CP0.n.Status.b.EXL = 0; } - UpdateCP0Status(); + cpuUpdateOperationMode(); + cpuSetNextEventDelta(4); intSetBranch(); } @@ -540,7 +542,8 @@ void DI() { if (cpuRegs.CP0.n.Status.b._EDI || cpuRegs.CP0.n.Status.b.EXL || cpuRegs.CP0.n.Status.b.ERL || (cpuRegs.CP0.n.Status.b.KSU == 0)) { cpuRegs.CP0.n.Status.b.EIE = 0; - //UpdateCP0Status(); // ints are disabled so checking for them is kinda silly... + // IRQs are disabled so no need to do a cpu exception/event test... + //cpuSetNextEventDelta(); } } @@ -548,7 +551,8 @@ void EI() { if (cpuRegs.CP0.n.Status.b._EDI || cpuRegs.CP0.n.Status.b.EXL || cpuRegs.CP0.n.Status.b.ERL || (cpuRegs.CP0.n.Status.b.KSU == 0)) { cpuRegs.CP0.n.Status.b.EIE = 1; - UpdateCP0Status(); + // schedule an event test, which will check for and raise pending IRQs. + cpuSetNextEventDelta(4); } } diff --git a/pcsx2/COP0.h b/pcsx2/COP0.h index 558eab6170..c2da74fcc3 100644 --- a/pcsx2/COP0.h +++ b/pcsx2/COP0.h @@ -17,7 +17,7 @@ #define __COP0_H__ extern void __fastcall WriteCP0Status(u32 value); -extern void UpdateCP0Status(); +extern void cpuUpdateOperationMode(); extern void WriteTLB(int i); extern void UnmapTLB(int i); extern void MapTLB(int i); diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index 2979eb9219..079cbdacc3 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -93,7 +93,7 @@ static __fi void _rcntSet( int cntidx ) if (c < nextCounter) { nextCounter = c; - cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes + cpuSetNextEvent( nextsCounter, nextCounter ); //Need to update on counter resets/target changes } // Ignore target diff if target is currently disabled. @@ -111,7 +111,7 @@ static __fi void _rcntSet( int cntidx ) if (c < nextCounter) { nextCounter = c; - cpuSetNextBranch( nextsCounter, nextCounter ); //Need to update on counter resets/target changes + cpuSetNextEvent( nextsCounter, nextCounter ); //Need to update on counter resets/target changes } } } @@ -419,7 +419,7 @@ __fi void rcntUpdate_hScanline() { if( !cpuTestCycle( hsyncCounter.sCycle, hsyncCounter.CycleT ) ) return; - //iopBranchAction = 1; + //iopEventAction = 1; if (hsyncCounter.Mode & MODE_HBLANK) { //HBLANK Start rcntStartGate(false, hsyncCounter.sCycle); psxCheckStartGate16(0); @@ -890,6 +890,6 @@ void SaveStateBase::rcntFreeze() for( int i=0; i<4; i++ ) _rcntSetGate( i ); - iopBranchAction = 1; // probably not needed but won't hurt anything either. + iopEventAction = 1; // probably not needed but won't hurt anything either. } } diff --git a/pcsx2/Dump.cpp b/pcsx2/Dump.cpp index 5b08ba515d..4d2324b6f1 100644 --- a/pcsx2/Dump.cpp +++ b/pcsx2/Dump.cpp @@ -55,7 +55,7 @@ void iDumpPsxRegisters(u32 startpc, u32 temp) for(i = 0; i < 34; i+=2) __Log("%spsx%s: %x %x", pstr, disRNameGPR[i], psxRegs.GPR.r[i], psxRegs.GPR.r[i+1]); - DbgCon.WriteLn("%scycle: %x %x %x; counters %x %x", pstr, psxRegs.cycle, g_psxNextBranchCycle, EEsCycle, + DbgCon.WriteLn("%scycle: %x %x %x; counters %x %x", pstr, psxRegs.cycle, g_iopNextEventCycle, EEsCycle, psxNextsCounter, psxNextCounter); DbgCon.WriteLn(wxsFormat(L"psxdma%d ", 2) + hw_dma(2).desc()); @@ -109,7 +109,7 @@ void iDumpRegisters(u32 startpc, u32 temp) __Log("%svfACC: %x %x %x %x", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]); __Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0], cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]); - __Log("%sCycle: %x %x, Count: %x", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count); + __Log("%sCycle: %x %x, Count: %x", pstr, cpuRegs.cycle, g_nextEventCycle, cpuRegs.CP0.n.Count); iDumpPsxRegisters(psxRegs.pc, temp); diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index fd5cc0fce8..2347437b57 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -53,7 +53,7 @@ void __fastcall ReadFIFO_VIF1(mem128_t* out) if (vif1Regs.stat.FQC > 0) { GetMTGS().WaitGS(); - GSreadFIFO(&psHu64(VIF1_FIFO)); + GSreadFIFO((u64*)out); vif1.GSLastDownloadSize--; if (vif1.GSLastDownloadSize <= 16) gifRegs.stat.OPH = false; @@ -61,7 +61,6 @@ void __fastcall ReadFIFO_VIF1(mem128_t* out) } } - CopyQWC( out, &psHu128(VIF1_FIFO) ); VIF_LOG("ReadFIFO/VIF1 -> %ls", out->ToString().c_str()); } @@ -72,8 +71,6 @@ void __fastcall WriteFIFO_VIF0(const mem128_t *value) { VIF_LOG("WriteFIFO/VIF0 <- %ls", value->ToString().c_str()); - CopyQWC(&psHu128(VIF0_FIFO), value); - vif0ch.qwc += 1; if(vif0.irqoffset != 0 && vif0.vifstalled == true) DevCon.Warning("Offset on VIF0 FIFO start!"); bool ret = VIF0transfer((u32*)value, 4); @@ -94,8 +91,6 @@ void __fastcall WriteFIFO_VIF1(const mem128_t *value) { VIF_LOG("WriteFIFO/VIF1 <- %ls", value->ToString().c_str()); - CopyQWC(&psHu128(VIF1_FIFO), value); - if (vif1Regs.stat.FDR) DevCon.Warning("writing to fifo when fdr is set!"); if (vif1Regs.stat.test(VIF1_STAT_INT | VIF1_STAT_VSS | VIF1_STAT_VIS | VIF1_STAT_VFS) ) @@ -123,18 +118,15 @@ void __fastcall WriteFIFO_VIF1(const mem128_t *value) pxAssertDev( ret, "vif stall code not implemented" ); } -// Dummy GIF-TAG Packet to Guarantee Count = 1 -__aligned16 u128 nloop0_packet; - void __fastcall WriteFIFO_GIF(const mem128_t *value) { GIF_LOG("WriteFIFO/GIF <- %ls", value->ToString().c_str()); - CopyQWC(&psHu128(GIF_FIFO), value); - CopyQWC(&nloop0_packet, value); + //CopyQWC(&psHu128(GIF_FIFO), value); + //CopyQWC(&nloop0_packet, value); GetMTGS().PrepDataPacket(GIF_PATH_3, 1); - GIFPath_CopyTag( GIF_PATH_3, &nloop0_packet, 1 ); + GIFPath_CopyTag( GIF_PATH_3, value, 1 ); GetMTGS().SendDataPacket(); if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs.stat.APATH == GIF_APATH3 ) { diff --git a/pcsx2/HwRead.cpp b/pcsx2/HwRead.cpp index c20026cee2..7e25c1144a 100644 --- a/pcsx2/HwRead.cpp +++ b/pcsx2/HwRead.cpp @@ -27,8 +27,8 @@ static __fi void IntCHackCheck() { // Sanity check: To protect from accidentally "rewinding" the cyclecount // on the few times nextBranchCycle can be behind our current cycle. - s32 diff = g_nextBranchCycle - cpuRegs.cycle; - if( diff > 0 ) cpuRegs.cycle = g_nextBranchCycle; + s32 diff = g_nextEventCycle - cpuRegs.cycle; + if( diff > 0 ) cpuRegs.cycle = g_nextEventCycle; } static const uint HwF_VerboseConLog = 1<<0; @@ -48,7 +48,15 @@ mem32_t __fastcall _hwRead32(u32 mem) case 0x02: return ipuRead32( mem ); - case 0x03: return dmacRead32<0x03>( mem ); + case 0x03: + if (mem >= EEMemoryMap::VIF0_Start) + { + if(mem >= EEMemoryMap::VIF1_Start) + return vifRead32<1>(mem); + else + return vifRead32<0>(mem); + } + return dmacRead32<0x03>( mem ); case 0x04: case 0x05: @@ -65,7 +73,7 @@ mem32_t __fastcall _hwRead32(u32 mem) DevCon.WriteLn( Color_Cyan, "Reading 32-bit FIFO data" ); u128 out128; - _hwRead128(mem, &out128); + _hwRead128(mem & ~0x0f, &out128); return out128._u32[(mem >> 2) & 0x3]; } break; @@ -221,7 +229,7 @@ static void _hwRead64(u32 mem, mem64_t* result ) DevCon.WriteLn( Color_Cyan, "Reading 64-bit FIFO data (%s 64 bits discarded)", wordpart ? "upper" : "lower" ); u128 out128; - _hwRead128(mem, &out128); + _hwRead128(mem & ~0x0f, &out128); *result = out128._u64[wordpart]; } return; diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index 74a92020a2..d1c1b98b17 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -68,7 +68,7 @@ void __fastcall _hwWrite32( u32 mem, u32 value ) zerofill._u32[(mem >> 2) & 0x03] = value; DevCon.WriteLn( Color_Cyan, "Writing 32-bit FIFO data (zero-extended to 128 bits)" ); - _hwWrite128(mem, &zerofill); + _hwWrite128(mem & ~0x0f, &zerofill); } return; @@ -301,7 +301,7 @@ void __fastcall _hwWrite64( u32 mem, const mem64_t* srcval ) u128 zerofill = u128::From32(0); zerofill._u64[(mem >> 3) & 0x01] = *srcval; - hwWrite128(mem, &zerofill); + hwWrite128(mem & ~0x0f, &zerofill); } return; diff --git a/pcsx2/IPU/IPUdma.cpp b/pcsx2/IPU/IPUdma.cpp index f44b1b09b8..5a5949e0b0 100644 --- a/pcsx2/IPU/IPUdma.cpp +++ b/pcsx2/IPU/IPUdma.cpp @@ -381,7 +381,10 @@ __fi void dmaIPU0() // fromIPU ipu0dma.chcr.STR = false; hwDmacIrq(DMAC_FROM_IPU); } - IPUProcessInterrupt(); + + //IPUProcessInterrupt(); + extern void IPUWorker(); + if (ipuRegs.ctrl.BUSY) IPUWorker(); } __fi void dmaIPU1() // toIPU diff --git a/pcsx2/Interpreter.cpp b/pcsx2/Interpreter.cpp index cb7e46c283..558ae19842 100644 --- a/pcsx2/Interpreter.cpp +++ b/pcsx2/Interpreter.cpp @@ -375,7 +375,7 @@ static void intReset() static void intEventTest() { // Perform counters, ints, and IOP updates: - _cpuBranchTest_Shared(); + _cpuEventTest_Shared(); } static void intExecute() diff --git a/pcsx2/IopCounters.cpp b/pcsx2/IopCounters.cpp index ebd49f8d29..b6e69c7a37 100644 --- a/pcsx2/IopCounters.cpp +++ b/pcsx2/IopCounters.cpp @@ -391,7 +391,7 @@ void psxRcntUpdate() int i; //u32 change = 0; - g_psxNextBranchCycle = psxRegs.cycle + 32; + g_iopNextEventCycle = psxRegs.cycle + 32; psxNextCounter = 0x7fffffff; psxNextsCounter = psxRegs.cycle; diff --git a/pcsx2/IopDma.cpp b/pcsx2/IopDma.cpp index 238300924f..0225d886be 100644 --- a/pcsx2/IopDma.cpp +++ b/pcsx2/IopDma.cpp @@ -51,10 +51,10 @@ static void __fastcall psxDmaGeneric(u32 madr, u32 bcr, u32 chcr, u32 spuCore, _ if (psxCounters[6].CycleT < psxNextCounter) psxNextCounter = psxCounters[6].CycleT; - if((g_psxNextBranchCycle - psxNextsCounter) > (u32)psxNextCounter) + if((g_iopNextEventCycle - psxNextsCounter) > (u32)psxNextCounter) { - //DevCon.Warning("SPU2async Setting new counter branch, old %x new %x ((%x - %x = %x) > %x delta)", g_psxNextBranchCycle, psxNextsCounter + psxNextCounter, g_psxNextBranchCycle, psxNextsCounter, (g_psxNextBranchCycle - psxNextsCounter), psxNextCounter); - g_psxNextBranchCycle = psxNextsCounter + psxNextCounter; + //DevCon.Warning("SPU2async Setting new counter branch, old %x new %x ((%x - %x = %x) > %x delta)", g_iopNextEventCycle, psxNextsCounter + psxNextCounter, g_iopNextEventCycle, psxNextsCounter, (g_iopNextEventCycle - psxNextsCounter), psxNextCounter); + g_iopNextEventCycle = psxNextsCounter + psxNextCounter; } } diff --git a/pcsx2/Linux/pcsx2.cbp b/pcsx2/Linux/pcsx2.cbp index 1a723591f9..b909e2ced0 100644 --- a/pcsx2/Linux/pcsx2.cbp +++ b/pcsx2/Linux/pcsx2.cbp @@ -394,7 +394,6 @@ - diff --git a/pcsx2/R3000A.cpp b/pcsx2/R3000A.cpp index e492bf169e..44f9f20ca5 100644 --- a/pcsx2/R3000A.cpp +++ b/pcsx2/R3000A.cpp @@ -29,22 +29,22 @@ u32 g_psxConstRegs[32]; u32 g_psxHasConstReg, g_psxFlushedConstReg; // Controls when branch tests are performed. -u32 g_psxNextBranchCycle = 0; +u32 g_iopNextEventCycle = 0; // This value is used when the IOP execution is broken to return control to the EE. // (which happens when the IOP throws EE-bound interrupts). It holds the value of -// psxCycleEE (which is set to zero to facilitate the code break), so that the unrun +// iopCycleEE (which is set to zero to facilitate the code break), so that the unrun // cycles can be accounted for later. -s32 psxBreak = 0; +s32 iopBreak = 0; // tracks the IOP's current sync status with the EE. When it dips below zero, // control is returned to the EE. -s32 psxCycleEE = -1; +s32 iopCycleEE = -1; // Used to signal to the EE when important actions that need IOP-attention have // happened (hsyncs, vsyncs, IOP exceptions, etc). IOP runs code whenever this // is true, even if it's already running ahead a bit. -bool iopBranchAction = false; +bool iopEventAction = false; bool iopEventTestIsActive = false; @@ -58,9 +58,9 @@ void psxReset() psxRegs.CP0.n.Status = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1 psxRegs.CP0.n.PRid = 0x0000001f; // PRevID = Revision ID, same as the IOP R3000A - psxBreak = 0; - psxCycleEE = -1; - g_psxNextBranchCycle = psxRegs.cycle + 4; + iopBreak = 0; + iopCycleEE = -1; + g_iopNextEventCycle = psxRegs.cycle + 4; psxHwReset(); @@ -113,8 +113,8 @@ __fi void psxSetNextBranch( u32 startCycle, s32 delta ) // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. - if( (int)(g_psxNextBranchCycle - startCycle) > delta ) - g_psxNextBranchCycle = startCycle + delta; + if( (int)(g_iopNextEventCycle - startCycle) > delta ) + g_iopNextEventCycle = startCycle + delta; } __fi void psxSetNextBranchDelta( s32 delta ) @@ -151,13 +151,13 @@ __fi void PSX_INT( IopEventId n, s32 ecycle ) psxSetNextBranchDelta( ecycle ); - if( psxCycleEE < 0 ) + if( iopCycleEE < 0 ) { // The EE called this int, so inform it to branch as needed: // fixme - this doesn't take into account EE/IOP sync (the IOP may be running // ahead or behind the EE as per the EEsCycles value) - s32 iopDelta = (g_psxNextBranchCycle-psxRegs.cycle)*8; - cpuSetNextBranchDelta( iopDelta ); + s32 iopDelta = (g_iopNextEventCycle-psxRegs.cycle)*8; + cpuSetNextEventDelta( iopDelta ); } } @@ -211,18 +211,18 @@ static __fi void _psxTestInterrupts() } } -__ri void psxBranchTest() +__ri void iopEventTest() { if( psxTestCycle( psxNextsCounter, psxNextCounter ) ) { psxRcntUpdate(); - iopBranchAction = true; + iopEventAction = true; } else { // start the next branch at the next counter event by default // the interrupt code below will assign nearer branches if needed. - g_psxNextBranchCycle = psxNextsCounter+psxNextCounter; + g_iopNextEventCycle = psxNextsCounter+psxNextCounter; } @@ -239,7 +239,7 @@ __ri void psxBranchTest() { PSXCPU_LOG("Interrupt: %x %x", psxHu32(0x1070), psxHu32(0x1074)); psxException(0, 0); - iopBranchAction = true; + iopEventAction = true; // No need to execute the SIFhack after cpuExceptions, since these by nature break SIF's // thread sleep hangs and allow the IOP to "come back to life." @@ -258,9 +258,9 @@ void iopTestIntc() // An iop exception has occurred while the EE is running code. // Inform the EE to branch so the IOP can handle it promptly: - cpuSetNextBranchDelta( 16 ); - iopBranchAction = true; - //Console.Error( "** IOP Needs an EE EventText, kthx ** %d", psxCycleEE ); + cpuSetNextEventDelta( 16 ); + iopEventAction = true; + //Console.Error( "** IOP Needs an EE EventText, kthx ** %d", iopCycleEE ); // Note: No need to set the iop's branch delta here, since the EE // will run an IOP branch test regardless. diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index ec94d891aa..62b6e6af71 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -117,9 +117,9 @@ struct psxRegisters { extern __aligned16 psxRegisters psxRegs; -extern u32 g_psxNextBranchCycle; -extern s32 psxBreak; // used when the IOP execution is broken and control returned to the EE -extern s32 psxCycleEE; // tracks IOP's current sych status with the EE +extern u32 g_iopNextEventCycle; +extern s32 iopBreak; // used when the IOP execution is broken and control returned to the EE +extern s32 iopCycleEE; // tracks IOP's current sych status with the EE #ifndef _PC_ @@ -172,7 +172,7 @@ extern u32 EEoCycle; extern s32 psxNextCounter; extern u32 psxNextsCounter; -extern bool iopBranchAction; +extern bool iopEventAction; extern bool iopEventTestIsActive; // Branching status used when throwing exceptions. @@ -196,7 +196,7 @@ extern R3000Acpu psxRec; extern void psxReset(); extern void __fastcall psxException(u32 code, u32 step); -extern void psxBranchTest(); +extern void iopEventTest(); extern void psxMemReset(); // Subsets diff --git a/pcsx2/R3000AInterpreter.cpp b/pcsx2/R3000AInterpreter.cpp index e65119d7f5..106ae9ed8c 100644 --- a/pcsx2/R3000AInterpreter.cpp +++ b/pcsx2/R3000AInterpreter.cpp @@ -133,7 +133,7 @@ static __fi void execI() psxRegs.pc+= 4; psxRegs.cycle++; - psxCycleEE-=8; + iopCycleEE-=8; psxBSC[psxRegs.code >> 26](); } @@ -147,7 +147,7 @@ static void doBranch(s32 tar) { iopIsDelaySlot = false; psxRegs.pc = branchPC; - psxBranchTest(); + iopEventTest(); } static void intAlloc() { @@ -162,16 +162,16 @@ static void intExecute() { static s32 intExecuteBlock( s32 eeCycles ) { - psxBreak = 0; - psxCycleEE = eeCycles; + iopBreak = 0; + iopCycleEE = eeCycles; - while (psxCycleEE > 0){ + while (iopCycleEE > 0){ branch2 = 0; while (!branch2) { execI(); } } - return psxBreak + psxCycleEE; + return iopBreak + iopCycleEE; } static void intClear(u32 Addr, u32 Size) { diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index 30c7ab6989..1138594ecd 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -71,7 +71,7 @@ void cpuReset() fpuRegs.fprc[0] = 0x00002e00; // fpu Revision.. fpuRegs.fprc[31] = 0x01000001; // fpu Status/Control - g_nextBranchCycle = cpuRegs.cycle + 4; + g_nextEventCycle = cpuRegs.cycle + 4; EEsCycle = 0; EEoCycle = cpuRegs.cycle; @@ -236,21 +236,21 @@ void cpuTestMissingHwInts() { } // sets a branch test to occur some time from an arbitrary starting point. -__fi void cpuSetNextBranch( u32 startCycle, s32 delta ) +__fi void cpuSetNextEvent( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. - if( (int)(g_nextBranchCycle - startCycle) > delta ) + if( (int)(g_nextEventCycle - startCycle) > delta ) { - g_nextBranchCycle = startCycle + delta; + g_nextEventCycle = startCycle + delta; } } // sets a branch to occur some time from the current cycle -__fi void cpuSetNextBranchDelta( s32 delta ) +__fi void cpuSetNextEventDelta( s32 delta ) { - cpuSetNextBranch( cpuRegs.cycle, delta ); + cpuSetNextEvent( cpuRegs.cycle, delta ); } // tests the cpu cycle agaisnt the given start and delta values. @@ -264,9 +264,9 @@ __fi int cpuTestCycle( u32 startCycle, s32 delta ) } // tells the EE to run the branch test the next time it gets a chance. -__fi void cpuSetBranch() +__fi void cpuSetEvent() { - g_nextBranchCycle = cpuRegs.cycle; + g_nextEventCycle = cpuRegs.cycle; } __fi void cpuClearInt( uint i ) @@ -285,7 +285,7 @@ static __fi void TESTINT( u8 n, void (*callback)() ) callback(); } else - cpuSetNextBranch( cpuRegs.sCycle[n], cpuRegs.eCycle[n] ); + cpuSetNextEvent( cpuRegs.sCycle[n], cpuRegs.eCycle[n] ); } // [TODO] move this function to LegacyDmac.cpp, and remove most of the DMAC-related headers from @@ -330,7 +330,7 @@ static __fi void _cpuTestTIMR() s_iLastCOP0Cycle = cpuRegs.cycle; // fixme: this looks like a hack to make up for the fact that the TIMR - // doesn't yet have a proper mechanism for setting itself up on a nextBranchCycle. + // doesn't yet have a proper mechanism for setting itself up on a nextEventCycle. // A proper fix would schedule the TIMR to trigger at a specific cycle anytime // the Count or Compare registers are modified. @@ -365,15 +365,15 @@ static bool cpuIntsEnabled(int Interrupt) !cpuRegs.CP0.n.Status.b.EXL && (cpuRegs.CP0.n.Status.b.ERL == 0) && IntType; } -// if cpuRegs.cycle is greater than this cycle, should check cpuBranchTest for updates -u32 g_nextBranchCycle = 0; +// if cpuRegs.cycle is greater than this cycle, should check cpuEventTest for updates +u32 g_nextEventCycle = 0; // Shared portion of the branch test, called from both the Interpreter // and the recompiler. (moved here to help alleviate redundant code) -__fi void _cpuBranchTest_Shared() +__fi void _cpuEventTest_Shared() { ScopedBool etest(eeEventTestIsActive); - g_nextBranchCycle = cpuRegs.cycle + eeWaitCycles; + g_nextEventCycle = cpuRegs.cycle + eeWaitCycles; // ---- Counters ------------- // Important: the vsync counter must be the first to be checked. It includes emulation @@ -397,23 +397,23 @@ __fi void _cpuBranchTest_Shared() _cpuTestInterrupts(); // ---- IOP ------------- - // * It's important to run a psxBranchTest before calling ExecuteBlock. This + // * It's important to run a iopEventTest before calling ExecuteBlock. This // is because the IOP does not always perform branch tests before returning // (during the prev branch) and also so it can act on the state the EE has // given it before executing any code. // // * The IOP cannot always be run. If we run IOP code every time through the - // cpuBranchTest, the IOP generally starts to run way ahead of the EE. + // cpuEventTest, the IOP generally starts to run way ahead of the EE. EEsCycle += cpuRegs.cycle - EEoCycle; EEoCycle = cpuRegs.cycle; if( EEsCycle > 0 ) - iopBranchAction = true; + iopEventAction = true; - psxBranchTest(); + iopEventTest(); - if( iopBranchAction ) + if( iopEventAction ) { //if( EEsCycle < -450 ) // Console.WriteLn( " IOP ahead by: %d cycles", -EEsCycle ); @@ -424,34 +424,11 @@ __fi void _cpuBranchTest_Shared() // run closely in sync during raised exception events. But in practice it didn't // seem to make much of a difference. - // Note: The IOP is very good about chaining blocks together so it tends to - // run lots of cycles, even with only 32 (4 IOP) cycles specified here. That's - // probably why it doesn't improve sync much. - - /*bool eeExceptPending = cpuIntsEnabled() && - //( cpuRegs.CP0.n.Status.b.EIE && cpuRegs.CP0.n.Status.b.IE && (cpuRegs.CP0.n.Status.b.ERL == 0) ) && - //( (cpuRegs.CP0.n.Status.val & 0x10007) == 0x10001 ) && - ( (cpuRegs.interrupt & (3<<30)) != 0 ); - - if( eeExceptPending ) - { - // ExecuteBlock returns a negative value, so subtract it from the cycle count - // specified to get the total cycles processed! :D - int cycleCount = std::min( EEsCycle, (s32)(eeWaitCycles>>4) ); - int cyclesRun = cycleCount - psxCpu->ExecuteBlock( cycleCount ); - EEsCycle -= cyclesRun; - //Console.Warning( "IOP Exception-Pending Execution -- EEsCycle: %d", EEsCycle ); - } - else*/ - { - EEsCycle = psxCpu->ExecuteBlock( EEsCycle ); - } - - iopBranchAction = false; + iopEventAction = false; } // ---- VU0 ------------- - // We're in a BranchTest. All dynarec registers are flushed + // We're in a EventTest. All dynarec registers are flushed // so there is no need to freeze registers here. CpuVU0->ExecuteBlock(); @@ -466,19 +443,19 @@ __fi void _cpuBranchTest_Shared() // EE's running way ahead of the IOP still, so we should branch quickly to give the // IOP extra timeslices in short order. - cpuSetNextBranchDelta( 48 ); - //Console.Warning( "EE ahead of the IOP -- Rapid Branch! %d", EEsCycle ); + cpuSetNextEventDelta( 48 ); + //Console.Warning( "EE ahead of the IOP -- Rapid Event! %d", EEsCycle ); } // The IOP could be running ahead/behind of us, so adjust the iop's next branch by its // relative position to the EE (via EEsCycle) - cpuSetNextBranchDelta( ((g_psxNextBranchCycle-psxRegs.cycle)*8) - EEsCycle ); + cpuSetNextEventDelta( ((g_iopNextEventCycle-psxRegs.cycle)*8) - EEsCycle ); // Apply the hsync counter's nextCycle - cpuSetNextBranch( hsyncCounter.sCycle, hsyncCounter.CycleT ); + cpuSetNextEvent( hsyncCounter.sCycle, hsyncCounter.CycleT ); // Apply vsync and other counter nextCycles - cpuSetNextBranch( nextsCounter, nextCounter ); + cpuSetNextEvent( nextsCounter, nextCounter ); // ---- INTC / DMAC Exceptions ----------------- // Raise the INTC and DMAC interrupts here, which usually throw exceptions. @@ -501,15 +478,11 @@ __ri void cpuTestINTCInts() if( (psHu32(INTC_STAT) & psHu32(INTC_MASK)) == 0 ) return; - cpuRegs.interrupt|= 1 << 30; - cpuRegs.sCycle[30] = cpuRegs.cycle; - cpuRegs.eCycle[30] = 4; //Needs to be 4 to account for bus delays/pipelines etc - - cpuSetNextBranchDelta( 4 ); - if(eeEventTestIsActive && (psxCycleEE > 0)) + cpuSetNextEventDelta( 4 ); + if(eeEventTestIsActive && (iopCycleEE > 0)) { - psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run. - psxCycleEE = 0; + iopBreak += iopCycleEE; // record the number of cycles the IOP didn't run. + iopCycleEE = 0; } } @@ -525,15 +498,11 @@ __fi void cpuTestDMACInts() if ( ( (psHu16(0xe012) & psHu16(0xe010)) == 0) && ( (psHu16(0xe010) & 0x8000) == 0) ) return; - cpuRegs.interrupt|= 1 << 31; - cpuRegs.sCycle[31] = cpuRegs.cycle; - cpuRegs.eCycle[31] = 4; //Needs to be 4 to account for bus delays/pipelines etc - - cpuSetNextBranchDelta( 4 ); - if(eeEventTestIsActive && (psxCycleEE > 0)) + cpuSetNextEventDelta( 4 ); + if(eeEventTestIsActive && (iopCycleEE > 0)) { - psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run. - psxCycleEE = 0; + iopBreak += iopCycleEE; // record the number of cycles the IOP didn't run. + iopCycleEE = 0; } } @@ -567,16 +536,16 @@ __fi void CPU_INT( EE_EventType n, s32 ecycle) // Interrupt is happening soon: make sure both EE and IOP are aware. - if( ecycle <= 28 && psxCycleEE > 0 ) + if( ecycle <= 28 && iopCycleEE > 0 ) { // If running in the IOP, force it to break immediately into the EE. // the EE's branch test is due to run. - psxBreak += psxCycleEE; // record the number of cycles the IOP didn't run. - psxCycleEE = 0; + iopBreak += iopCycleEE; // record the number of cycles the IOP didn't run. + iopCycleEE = 0; } - cpuSetNextBranchDelta( cpuRegs.eCycle[n] ); + cpuSetNextEventDelta( cpuRegs.eCycle[n] ); } // Called from recompilers; __fastcall define is mandatory. diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index 1793e01406..eac96559b3 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -244,7 +244,7 @@ extern __aligned16 cpuRegisters cpuRegs; extern __aligned16 fpuRegisters fpuRegs; extern __aligned16 tlbs tlb[48]; -extern u32 g_nextBranchCycle; +extern u32 g_nextEventCycle; extern bool eeEventTestIsActive; extern u32 s_iLastCOP0Cycle; extern u32 s_iLastPERFCycle[2]; @@ -415,12 +415,12 @@ extern void cpuTlbMissW(u32 addr, u32 bd); extern void cpuTestHwInts(); extern void cpuClearInt(uint n); -extern void cpuSetNextBranch( u32 startCycle, s32 delta ); -extern void cpuSetNextBranchDelta( s32 delta ); +extern void cpuSetNextEvent( u32 startCycle, s32 delta ); +extern void cpuSetNextEventDelta( s32 delta ); extern int cpuTestCycle( u32 startCycle, s32 delta ); -extern void cpuSetBranch(); +extern void cpuSetEvent(); -extern void _cpuBranchTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900: +extern void _cpuEventTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900: extern void cpuTestINTCInts(); extern void cpuTestDMACInts(); diff --git a/pcsx2/SaveState.cpp b/pcsx2/SaveState.cpp index 4967971662..26f6cbe57b 100644 --- a/pcsx2/SaveState.cpp +++ b/pcsx2/SaveState.cpp @@ -179,8 +179,8 @@ void SaveStateBase::FreezeRegisters() FreezeTag( "Cycles" ); Freeze(EEsCycle); Freeze(EEoCycle); - Freeze(g_nextBranchCycle); - Freeze(g_psxNextBranchCycle); + Freeze(g_nextEventCycle); + Freeze(g_iopNextEventCycle); Freeze(s_iLastCOP0Cycle); Freeze(s_iLastPERFCycle); diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index f1bcda1950..bf506a3d58 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -24,7 +24,7 @@ // the lower 16 bit value. IF the change is breaking of all compatibility with old // states, increment the upper 16 bit value, and clear the lower 16 bits to 0. -static const u32 g_SaveVersion = 0x8b490000; +static const u32 g_SaveVersion = 0x8b4a0000; // this function is meant to be used in the place of GSfreeze, and provides a safe layer // between the GS saving function and the MTGS's needs. :) diff --git a/pcsx2/VUmicro.cpp b/pcsx2/VUmicro.cpp index 724d591e4e..480b91cd5e 100644 --- a/pcsx2/VUmicro.cpp +++ b/pcsx2/VUmicro.cpp @@ -33,7 +33,7 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { // Let VUs run behind EE instead of ahead if (stat & test) { - cpuSetNextBranchDelta((s+c)*2); + cpuSetNextEventDelta((s+c)*2); m_lastEEcycles = cpuRegs.cycle + (s*2); } } @@ -43,11 +43,11 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { delta >>= 1; // Divide by 2 (unsigned) Execute(delta); // Execute the time since the last call if (stat & test) { - cpuSetNextBranchDelta(c*2); + cpuSetNextEventDelta(c*2); m_lastEEcycles = cpuRegs.cycle; } } - else cpuSetNextBranchDelta(-delta); // Haven't caught-up from kick start + else cpuSetNextEventDelta(-delta); // Haven't caught-up from kick start } } @@ -63,7 +63,7 @@ void __fastcall BaseVUmicroCPU::ExecuteBlockJIT(BaseVUmicroCPU* cpu) { cpu->Execute(c); // Execute VU if (stat & test) { cpu->m_lastEEcycles+=(c*2); - cpuSetNextBranchDelta(c*2); + cpuSetNextEventDelta(c*2); } } } @@ -80,7 +80,7 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { // If the VU0 program didn't finish then we'll want to finish it up // pretty soon. This fixes vmhacks in some games (Naruto Ultimate Ninja 2) if(VU0.VI[REG_VPU_STAT].UL & vuRunning) - cpuSetNextBranchDelta( 192 ); // fixme : ideally this should be higher, like 512 or so. + cpuSetNextEventDelta( 192 ); // fixme : ideally this should be higher, like 512 or so. } else { Execute(vu0RunCycles); @@ -89,7 +89,7 @@ void BaseVUmicroCPU::ExecuteBlock(bool startUp) { // This helps keep the EE and VU0 in sync. // Check Silver Surfer. Currently has SPS varying with different branch deltas set below. if(VU0.VI[REG_VPU_STAT].UL & vuRunning) - cpuSetNextBranchDelta( 768 ); + cpuSetNextEventDelta( 768 ); } } diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 183579b978..f2d0b7da1c 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -21,8 +21,8 @@ #include "GS.h" #include "Gif.h" -vifStruct vif0; -vifStruct vif1; +__aligned16 vifStruct vif0, vif1; + tGSTransferStatus GSTransferStatus((STOPPED_MODE<<8) | (STOPPED_MODE<<4) | STOPPED_MODE); void vif0Reset() @@ -31,14 +31,6 @@ void vif0Reset() memzero(vif0); memzero(vif0Regs); - psHu64(VIF0_FIFO) = 0; - psHu64(VIF0_FIFO + 8) = 0; - - vif0Regs.stat.VPS = VPS_IDLE; - vif0Regs.stat.FQC = 0; - - vif0.done = false; - resetNewVif(0); } @@ -48,15 +40,6 @@ void vif1Reset() memzero(vif1); memzero(vif1Regs); - psHu64(VIF1_FIFO) = 0; - psHu64(VIF1_FIFO + 8) = 0; - - vif1Regs.stat.VPS = VPS_IDLE; - vif1Regs.stat.FQC = 0; // FQC=0 - - vif1.done = false; - cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's - resetNewVif(1); } @@ -64,7 +47,6 @@ void SaveStateBase::vif0Freeze() { FreezeTag("VIFdma"); Freeze(g_vifCycles); // Dunno if this one is needed, but whatever, it's small. :) - Freeze(g_vifmask); // mask settings for VIF0 and VIF1 Freeze(vif0); Freeze(nVif[0].bSize); @@ -153,6 +135,7 @@ __fi void vif1FBRST(u32 value) { if (FBRST(value).RST) // Reset Vif. { memzero(vif1); + //cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's vif1ch.qwc -= min((int)vif1ch.qwc, 16); //? psHu64(VIF1_FIFO) = 0; @@ -277,9 +260,29 @@ __fi void vif1STAT(u32 value) { #define caseVif(x) (idx ? VIF1_##x : VIF0_##x) +_vifT __fi u32 vifRead32(u32 mem) { + vifStruct& vif = GetVifX; + + switch (mem) { + case caseVif(ROW0): return vif.MaskRow._u32[0]; + case caseVif(ROW1): return vif.MaskRow._u32[1]; + case caseVif(ROW2): return vif.MaskRow._u32[2]; + case caseVif(ROW3): return vif.MaskRow._u32[3]; + + case caseVif(COL0): return vif.MaskCol._u32[0]; + case caseVif(COL1): return vif.MaskCol._u32[1]; + case caseVif(COL2): return vif.MaskCol._u32[2]; + case caseVif(COL3): return vif.MaskCol._u32[3]; + } + + return psHu32(mem); +} + // returns FALSE if no writeback is needed (or writeback is handled internally) // returns TRUE if the caller should writeback the value to the eeHw register map. _vifT __fi bool vifWrite32(u32 mem, u32 value) { + vifStruct& vif = GetVifX; + switch (mem) { case caseVif(MARK): VIF_LOG("VIF%d_MARK write32 0x%8.8x", idx, value); @@ -303,33 +306,23 @@ _vifT __fi bool vifWrite32(u32 mem, u32 value) { // standard register writes -- handled by caller. break; - case caseVif(ROW0): - case caseVif(ROW1): - case caseVif(ROW2): - case caseVif(ROW3): - // Here's a neat way to obfuscate code. This is a super-fancy-complicated version - // of a standard psHu32(mem) = value; writeback. Handled by caller for us, thanks! --air - //if (!idx) g_vifmask.Row0[ (mem>>4)&3 ] = value; - //else g_vifmask.Row1[ (mem>>4)&3 ] = value; - //((u32*)&vifXRegs.r0) [((mem>>4)&3)*4] = value; - break; + case caseVif(ROW0): vif.MaskRow._u32[0] = value; return false; + case caseVif(ROW1): vif.MaskRow._u32[1] = value; return false; + case caseVif(ROW2): vif.MaskRow._u32[2] = value; return false; + case caseVif(ROW3): vif.MaskRow._u32[3] = value; return false; - case caseVif(COL0): - case caseVif(COL1): - case caseVif(COL2): - case caseVif(COL3): - // Here's a neat way to obfuscate code. This is a super-fancy-complicated version - // of a standard psHu32(mem) = value; writeback. Handled by caller for us, thanks! --air - //if (!idx) g_vifmask.Col0[ (mem>>4)&3 ] = value; - //else g_vifmask.Col1[ (mem>>4)&3 ] = value; - //((u32*)&vifXRegs.c0) [((mem>>4)&3)*4] = value; - break; + case caseVif(COL0): vif.MaskCol._u32[0] = value; return false; + case caseVif(COL1): vif.MaskCol._u32[1] = value; return false; + case caseVif(COL2): vif.MaskCol._u32[2] = value; return false; + case caseVif(COL3): vif.MaskCol._u32[3] = value; return false; } // fall-through case: issue standard writeback behavior. return true; } +template u32 vifRead32<0>(u32 mem); +template u32 vifRead32<1>(u32 mem); template bool vifWrite32<0>(u32 mem, u32 value); template bool vifWrite32<1>(u32 mem, u32 value); diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 93381f0065..1110559a4e 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -213,8 +213,6 @@ struct VIFregisters { u32 addr; }; -extern VIFregisters *vifRegs; - static VIFregisters& vif0Regs = (VIFregisters&)eeHw[0x3800]; static VIFregisters& vif1Regs = (VIFregisters&)eeHw[0x3C00]; diff --git a/pcsx2/Vif1_Dma.cpp b/pcsx2/Vif1_Dma.cpp index 7b7ff73bb9..a651a8d89f 100644 --- a/pcsx2/Vif1_Dma.cpp +++ b/pcsx2/Vif1_Dma.cpp @@ -58,7 +58,7 @@ __fi void vif1FLUSH() void vif1TransferToMemory() { u32 size; - u64* pMem = (u64*)dmaGetAddr(vif1ch.madr, false); + u128* pMem = (u128*)dmaGetAddr(vif1ch.madr, false); // VIF from gsMemory if (pMem == NULL) //Is vif0ptag empty? @@ -78,54 +78,34 @@ void vif1TransferToMemory() // completely and execute the transfer there-after. //Console.Warning("Real QWC %x", vif1ch.qwc); size = min((u32)vif1ch.qwc, vif1.GSLastDownloadSize); + const u128* pMemEnd = pMem + vif1.GSLastDownloadSize; if (GSreadFIFO2 == NULL) { for (;size > 0; --size) { GetMTGS().WaitGS(); - GSreadFIFO(&psHu64(VIF1_FIFO)); - - pMem[0] = psHu64(VIF1_FIFO); - pMem[1] = psHu64(VIF1_FIFO + 8); - pMem += 2; - } - if(vif1ch.qwc > vif1.GSLastDownloadSize) - { - DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space"); - for (size = vif1ch.qwc - vif1.GSLastDownloadSize; size > 0; --size) - { - psHu64(VIF1_FIFO) = 0; - psHu64(VIF1_FIFO + 8) = 0; - pMem[0] = psHu64(VIF1_FIFO); - pMem[1] = psHu64(VIF1_FIFO + 8); - pMem += 2; - } + GSreadFIFO((u64*)pMem); + ++pMem; } } else { GetMTGS().WaitGS(); - GSreadFIFO2(pMem, size); - - // set incase read - psHu64(VIF1_FIFO) = pMem[2*size-2]; - psHu64(VIF1_FIFO + 8) = pMem[2*size-1]; - pMem += size * 2; - if(vif1ch.qwc > vif1.GSLastDownloadSize) - { - DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space"); - for (size = vif1ch.qwc - vif1.GSLastDownloadSize; size > 0; --size) - { - psHu64(VIF1_FIFO) = 0; - psHu64(VIF1_FIFO + 8) = 0; - pMem[0] = psHu64(VIF1_FIFO); - pMem[1] = psHu64(VIF1_FIFO + 8); - pMem += 2; - } - } + GSreadFIFO2((u64*)pMem, size); + pMem += size; } + if(pMem < pMemEnd) + { + DevCon.Warning("GS Transfer < VIF QWC, Clearing end of space"); + + __m128 zeroreg = _mm_setzero_ps(); + do { + _mm_store_ps((float*)pMem, zeroreg); + ++pMem; + } while (pMem < pMemEnd); + } g_vifCycles += vif1ch.qwc * 2; vif1ch.madr += vif1ch.qwc * 16; // mgs3 scene changes diff --git a/pcsx2/Vif1_MFIFO.cpp b/pcsx2/Vif1_MFIFO.cpp index 490b00dd4c..18817d9ced 100644 --- a/pcsx2/Vif1_MFIFO.cpp +++ b/pcsx2/Vif1_MFIFO.cpp @@ -19,16 +19,12 @@ #include "Gif.h" #include "Vif_Dma.h" -VIFregisters *vifRegs; -vifStruct *vif; u16 vifqwc = 0; u32 g_vifCycles = 0; u32 g_vu0Cycles = 0; u32 g_vu1Cycles = 0; u32 g_packetsizeonvu = 0; -__aligned16 VifMaskTypes g_vifmask; - extern u32 g_vifCycles; static u32 qwctag(u32 mask) diff --git a/pcsx2/Vif_Codes.cpp b/pcsx2/Vif_Codes.cpp index dff45af4f6..aa91c777bf 100644 --- a/pcsx2/Vif_Codes.cpp +++ b/pcsx2/Vif_Codes.cpp @@ -38,8 +38,7 @@ static __fi void vifFlush(int idx) { } static __fi void vuExecMicro(int idx, u32 addr) { - VURegs* VU = nVif[idx].VU; - VIFregisters& vifRegs = VU->GetVifRegs(); + VIFregisters& vifRegs = vifXRegs; int startcycles = 0; //vifFlush(idx); @@ -423,7 +422,7 @@ vifOp(vifCode_Offset) { return 0; } -template static __fi int _vifCode_STColRow(const u32* data, u32* pmem1, u32* pmem2) { +template static __fi int _vifCode_STColRow(const u32* data, u32* pmem2) { vifStruct& vifX = GetVifX; int ret = min(4 - vifX.tag.addr, vifX.vifpacketsize); @@ -432,16 +431,12 @@ template static __fi int _vifCode_STColRow(const u32* data, u32* pmem1, switch (ret) { case 4: - pmem1[12] = data[3]; pmem2[3] = data[3]; case 3: - pmem1[8] = data[2]; pmem2[2] = data[2]; case 2: - pmem1[4] = data[1]; pmem2[1] = data[1]; case 1: - pmem1[0] = data[0]; pmem2[0] = data[0]; break; jNO_DEFAULT @@ -462,10 +457,7 @@ vifOp(vifCode_STCol) { return 1; } pass2 { - u32* cols = idx ? g_vifmask.Col1 : g_vifmask.Col0; - u32* pmem1 = &vifXRegs.c0 + (vifX.tag.addr << 2); - u32* pmem2 = cols + vifX.tag.addr; - return _vifCode_STColRow(data, pmem1, pmem2); + return _vifCode_STColRow(data, &vifX.MaskCol._u32[vifX.tag.addr]); } pass3 { VifCodeLog("STCol"); } return 0; @@ -480,10 +472,7 @@ vifOp(vifCode_STRow) { return 1; } pass2 { - u32* rows = idx ? g_vifmask.Row1 : g_vifmask.Row0; - u32* pmem1 = &vifXRegs.r0 + (vifX.tag.addr << 2); - u32* pmem2 = rows + vifX.tag.addr; - return _vifCode_STColRow(data, pmem1, pmem2); + return _vifCode_STColRow(data, &vifX.MaskRow._u32[vifX.tag.addr]); } pass3 { VifCodeLog("STRow"); } return 0; @@ -516,11 +505,10 @@ vifOp(vifCode_STMod) { vifOp(vifCode_Unpack) { pass1 { - if (!idx) vifUnpackSetup<0>(data); - else vifUnpackSetup<1>(data); + vifUnpackSetup(data); return 1; } - pass2 { return nVifUnpack(idx, (u8*)data); } + pass2 { return nVifUnpack((u8*)data); } pass3 { VifCodeLog("Unpack"); } return 0; } diff --git a/pcsx2/Vif_Dma.h b/pcsx2/Vif_Dma.h index 402d473d3e..4d9a301b79 100644 --- a/pcsx2/Vif_Dma.h +++ b/pcsx2/Vif_Dma.h @@ -56,6 +56,8 @@ union tTRXREG { // NOTE, if debugging vif stalls, use sega classics, spyro, gt4, and taito struct vifStruct { + u128 MaskRow, MaskCol; + vifCode tag; int cmd; int irq; @@ -82,10 +84,10 @@ struct vifStruct { u8 GifWaitState; // 0 = General PATH checking, 1 = Flush path 3, 2 == Wait for VU1 }; -extern vifStruct* vif; -extern vifStruct vif0, vif1; +extern __aligned16 vifStruct vif0, vif1; extern u8 schedulepath3msk; +_vifT extern u32 vifRead32(u32 mem); _vifT extern bool vifWrite32(u32 mem, u32 value); extern void vif0Interrupt(); @@ -122,15 +124,3 @@ extern u32 g_vu1Cycles; extern u32 g_packetsizeonvu; extern void vif0FLUSH(); extern void vif1FLUSH(); - -//------------------------------------------------------------------ -// newVif SSE-optimized Row/Col Structs -//------------------------------------------------------------------ - -struct VifMaskTypes -{ - u32 Row0[4], Col0[4]; - u32 Row1[4], Col1[4]; -}; - -extern __aligned16 VifMaskTypes g_vifmask; // This struct is used by newVif diff --git a/pcsx2/Vif_Transfer.cpp b/pcsx2/Vif_Transfer.cpp index d48a00d872..18a8428142 100644 --- a/pcsx2/Vif_Transfer.cpp +++ b/pcsx2/Vif_Transfer.cpp @@ -25,7 +25,7 @@ // Doesn't stall if the next vifCode is the Mark command _vifT bool runMark(u32* &data) { if (((vifXRegs.code >> 24) & 0x7f) == 0x7) { - Console.WriteLn("Vif%d: Running Mark with I-bit", idx); + DevCon.WriteLn("Vif%d: Running Mark with I-bit", idx); return 1; // No Stall? } return 1; // Stall diff --git a/pcsx2/Vif_Unpack.cpp b/pcsx2/Vif_Unpack.cpp index c407bc977a..76bc1f60fa 100644 --- a/pcsx2/Vif_Unpack.cpp +++ b/pcsx2/Vif_Unpack.cpp @@ -25,212 +25,99 @@ enum UnpackOffset { OFFSET_W = 3 }; -static __fi u32 setVifRowRegs(u32 reg, u32 data) { - switch (reg) { - case 0: vifRegs->r0 = data; break; - case 1: vifRegs->r1 = data; break; - case 2: vifRegs->r2 = data; break; - case 3: vifRegs->r3 = data; break; - jNO_DEFAULT; - } +static __fi u32 setVifRow(vifStruct& vif, u32 reg, u32 data) { + vif.MaskRow._u32[reg] = data; return data; } -static __fi u32 getVifRowRegs(u32 reg) { - switch (reg) { - case 0: return vifRegs->r0; break; - case 1: return vifRegs->r1; break; - case 2: return vifRegs->r2; break; - case 3: return vifRegs->r3; break; - jNO_DEFAULT; - } - return 0; // unreachable... -} - -static __fi u32 getVifColRegs(u32 reg) { - switch (reg) { - case 0: return vifRegs->c0; break; - case 1: return vifRegs->c1; break; - case 2: return vifRegs->c2; break; - default: return vifRegs->c3; break; - } - return 0; // unreachable... -} - -template< bool doMask > +// cycle derives from vif.cl +// mode derives from vifRegs.mode +template< uint idx, uint mode, bool doMask > static __ri void writeXYZW(u32 offnum, u32 &dest, u32 data) { - u32 vifRowReg = getVifRowRegs(offnum); int n = 0; + vifStruct& vif = GetVifX; + if (doMask) { - switch (vif->cl) { - case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; - case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; - case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; - default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; + const VIFregisters& regs = vifXRegs; + switch (vif.cl) { + case 0: n = (regs.mask >> (offnum * 2)) & 0x3; break; + case 1: n = (regs.mask >> ( 8 + (offnum * 2))) & 0x3; break; + case 2: n = (regs.mask >> (16 + (offnum * 2))) & 0x3; break; + default: n = (regs.mask >> (24 + (offnum * 2))) & 0x3; break; } } + // Four possible types of masking are handled below: + // 0 - Data + // 1 - MaskRow + // 2 - MaskCol + // 3 - Write protect + switch (n) { case 0: - if ((vif->cmd & 0x6F) != 0x6f) { - switch (vifRegs->mode) { - case 1: dest = data + vifRowReg; break; - case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break; - default: dest = data; break; - } + switch (mode) { + case 1: dest = data + vif.MaskRow._u32[offnum]; break; + case 2: dest = setVifRow(vif, offnum, vif.MaskRow._u32[offnum] + data); break; + default: dest = data; break; } - else dest = data; // v4-5 Unpack Mode break; - case 1: dest = vifRowReg; break; - case 2: dest = getVifColRegs(vif->cl); break; + case 1: dest = vif.MaskRow._u32[offnum]; break; + case 2: dest = vif.MaskCol._u32[min(vif.cl,3)]; break; case 3: break; } } +#define tParam idx,mode,doMask -template < bool doMask, class T > -static __fi void __fastcall UNPACK_S(u32 *dest, const T *data, int size) +template < uint idx, uint mode, bool doMask, class T > +static void __fastcall UNPACK_S(u32* dest, const T* src) { + u32 data = *src; + //S-# will always be a complete packet, no matter what. So we can skip the offset bits - writeXYZW(OFFSET_X, *dest++, *data); - writeXYZW(OFFSET_Y, *dest++, *data); - writeXYZW(OFFSET_Z, *dest++, *data); - writeXYZW(OFFSET_W, *dest , *data); + writeXYZW(OFFSET_X, *(dest+0), data); + writeXYZW(OFFSET_Y, *(dest+1), data); + writeXYZW(OFFSET_Z, *(dest+2), data); + writeXYZW(OFFSET_W, *(dest+3), data); } -template -static __ri void __fastcall UNPACK_V2(u32 *dest, const T *data, int size) +// The PS2 console actually writes v1v0v1v0 for all V2 unpacks -- the second v1v0 pair +// being officially "indeterminate" but some games very much depend on it. +template < uint idx, uint mode, bool doMask, class T > +static void __fastcall UNPACK_V2(u32* dest, const T* src) { - if (vifRegs->offset == OFFSET_X) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_Y; - size--; - } - } - - if (vifRegs->offset == OFFSET_Y) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data); - vifRegs->offset = OFFSET_Z; - size--; - } - } - - if (vifRegs->offset == OFFSET_Z) - { - writeXYZW(vifRegs->offset, *dest++, *dest-2); - vifRegs->offset = OFFSET_W; - } - - if (vifRegs->offset == OFFSET_W) - { - writeXYZW(vifRegs->offset, *dest, *data); - vifRegs->offset = OFFSET_X; - } + writeXYZW(OFFSET_X, *(dest+0), *(src+0)); + writeXYZW(OFFSET_Y, *(dest+1), *(src+1)); + writeXYZW(OFFSET_Z, *(dest+2), *(src+0)); + writeXYZW(OFFSET_W, *(dest+3), *(src+1)); } -template -static __ri void __fastcall UNPACK_V3(u32 *dest, const T *data, int size) +// V3 and V4 unpacks both use the V4 unpack logic, even though most of the OFFSET_W fields +// during V3 unpacking end up being overwritten by the next unpack. This is confirmed real +// hardware behavior that games such as Ape Escape 3 depend on. +template < uint idx, uint mode, bool doMask, class T > +static void __fastcall UNPACK_V4(u32* dest, const T* src) { - if(vifRegs->offset == OFFSET_X) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_Y; - size--; - } - } - - if(vifRegs->offset == OFFSET_Y) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_Z; - size--; - } - } - - if(vifRegs->offset == OFFSET_Z) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_W; - size--; - } - } - - if(vifRegs->offset == OFFSET_W) - { - // V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!) - // Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate - writeXYZW(vifRegs->offset, *dest, *data); - vifRegs->offset = OFFSET_X; - } + writeXYZW(OFFSET_X, *(dest+0), *(src+0)); + writeXYZW(OFFSET_Y, *(dest+1), *(src+1)); + writeXYZW(OFFSET_Z, *(dest+2), *(src+2)); + writeXYZW(OFFSET_W, *(dest+3), *(src+3)); } -template -static __fi void __fastcall UNPACK_V4(u32 *dest, const T *data , int size) +// V4_5 unpacks do not support the MODE register, and act as mode==0 always. +template< uint idx, bool doMask > +static void __fastcall UNPACK_V4_5(u32 *dest, const u32* src) { - while (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset++; - size--; - } + u32 data = *src; - if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; -} - -template< bool doMask > -static __ri void __fastcall UNPACK_V4_5(u32 *dest, const u32 *data, int size) -{ - //As with S-#, this will always be a complete packet - writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); - writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); - writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); - writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); + writeXYZW(OFFSET_X, *(dest+0), ((data & 0x001f) << 3)); + writeXYZW(OFFSET_Y, *(dest+1), ((data & 0x03e0) >> 2)); + writeXYZW(OFFSET_Z, *(dest+2), ((data & 0x7c00) >> 7)); + writeXYZW(OFFSET_W, *(dest+3), ((data & 0x8000) >> 8)); } // ===================================================================================================== -template < bool doMask, int size, class T > -static void __fastcall fUNPACK_S(u32 *dest, const T *data) -{ - UNPACK_S( dest, data, size ); -} - -template -static void __fastcall fUNPACK_V2(u32 *dest, const T *data) -{ - UNPACK_V2( dest, data, size ); -} - -template -static void __fastcall fUNPACK_V3(u32 *dest, const T *data) -{ - UNPACK_V3( dest, data, size ); -} - -template -static void __fastcall fUNPACK_V4(u32 *dest, const T *data) -{ - UNPACK_V4( dest, data, size ); -} - -template< bool doMask > -static void __fastcall fUNPACK_V4_5(u32 *dest, const u32 *data) -{ - UNPACK_V4_5(dest, data, 0); // size is ignored. -} - // -------------------------------------------------------------------------------------- // Main table for function unpacking. // -------------------------------------------------------------------------------------- @@ -245,53 +132,51 @@ static void __fastcall fUNPACK_V4_5(u32 *dest, const u32 *data) // to be cast as. --air // -#define _upk (UNPACKFUNCTYPE) -#define _odd (UNPACKFUNCTYPE_ODD) -#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits) -#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits) -#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits) -#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits) +#define _upk (UNPACKFUNCTYPE) +#define _unpk(usn, bits) (UNPACKFUNCTYPE_##usn##bits) -// 32-bits versions are unsigned-only!! -#define UnpackFuncPair32( sizefac, vt, doMask ) \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, +#define UnpackFuncSet( vt, idx, mode, usn, doMask ) \ + (UNPACKFUNCTYPE)_unpk(u,32) UNPACK_##vt, \ + (UNPACKFUNCTYPE)_unpk(usn,16) UNPACK_##vt, \ + (UNPACKFUNCTYPE)_unpk(usn,8) UNPACK_##vt \ -#define UnpackFuncPair( sizefac, vt, bits, doMask ) \ - (UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt, \ - (UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt, +#define UnpackV4_5set(idx, doMask) \ + (UNPACKFUNCTYPE)_unpk(u,32) UNPACK_V4_5 \ -#define UnpackFuncSet( doMask ) \ - { UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \ - { UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \ - { UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \ - { NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \ - { UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \ - { UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \ - { UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \ - { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \ - { UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \ - { UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \ - { UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \ - { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \ - { UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \ - { UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \ - { UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \ - { /* 0xF - V4-5 */ \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \ - 6, 2, 2, 4 }, +#define UnpackModeSet(idx, mode) \ + UnpackFuncSet( S, idx, mode, s, 0 ), NULL, \ + UnpackFuncSet( V2, idx, mode, s, 0 ), NULL, \ + UnpackFuncSet( V4, idx, mode, s, 0 ), NULL, \ + UnpackFuncSet( V4, idx, mode, s, 0 ), UnpackV4_5set(idx, 0), \ + \ + UnpackFuncSet( S, idx, mode, s, 1 ), NULL, \ + UnpackFuncSet( V2, idx, mode, s, 1 ), NULL, \ + UnpackFuncSet( V4, idx, mode, s, 1 ), NULL, \ + UnpackFuncSet( V4, idx, mode, s, 1 ), UnpackV4_5set(idx, 1), \ + \ + UnpackFuncSet( S, idx, mode, u, 0 ), NULL, \ + UnpackFuncSet( V2, idx, mode, u, 0 ), NULL, \ + UnpackFuncSet( V4, idx, mode, u, 0 ), NULL, \ + UnpackFuncSet( V4, idx, mode, u, 0 ), UnpackV4_5set(idx, 0), \ + \ + UnpackFuncSet( S, idx, mode, u, 1 ), NULL, \ + UnpackFuncSet( V2, idx, mode, u, 1 ), NULL, \ + UnpackFuncSet( V4, idx, mode, u, 1 ), NULL, \ + UnpackFuncSet( V4, idx, mode, u, 1 ), UnpackV4_5set(idx, 1) -const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] = +__aligned16 const UNPACKFUNCTYPE VIFfuncTable[2][3][4 * 4 * 2 * 2] = { - UnpackFuncSet( false ) - UnpackFuncSet( true ) + { + { UnpackModeSet(0,0) }, + { UnpackModeSet(0,1) }, + { UnpackModeSet(0,2) } + }, + + { + { UnpackModeSet(1,0) }, + { UnpackModeSet(1,1) }, + { UnpackModeSet(1,2) } + } }; //---------------------------------------------------------------------------- @@ -317,16 +202,23 @@ _vifT void vifUnpackSetup(const u32 *data) { if (vifNum == 0) vifNum = 256; vifXRegs.num = vifNum; + // Traditional-style way of calculating the gsize, based on VN/VL parameters. + // Useful when VN/VL are known template params, but currently they are not so we use + // the LUT instead (for now). + //uint vl = vifX.cmd & 0x03; + //uint vn = (vifX.cmd >> 2) & 0x3; + //uint gsize = ((32 >> vl) * (vn+1)) / 8; + + const u8& gsize = nVifT[vifX.cmd & 0x0f]; + if (vifXRegs.cycle.wl <= vifXRegs.cycle.cl) { - if (!idx) vif0.tag.size = ((vifNum * VIFfuncTable[ vif0.cmd & 0xf ].gsize) + 3) >> 2; - else vif1.tag.size = ((vifNum * VIFfuncTable[ vif1.cmd & 0xf ].gsize) + 3) >> 2; + vifX.tag.size = ((vifNum * gsize) + 3) / 4; } else { int n = vifXRegs.cycle.cl * (vifNum / vifXRegs.cycle.wl) + _limit(vifNum % vifXRegs.cycle.wl, vifXRegs.cycle.cl); - if (!idx) vif0.tag.size = ((n * VIFfuncTable[ vif0.cmd & 0xf ].gsize) + 3) >> 2; - else vif1.tag.size = ((n * VIFfuncTable[ vif1.cmd & 0xf ].gsize) + 3) >> 2; + vifX.tag.size = ((n * gsize) + 3) >> 2; } u32 addr = vifXRegs.code; @@ -337,7 +229,6 @@ _vifT void vifUnpackSetup(const u32 *data) { vifX.cl = 0; vifX.tag.cmd = vifX.cmd; - vifXRegs.offset = 0; } template void vifUnpackSetup<0>(const u32 *data); diff --git a/pcsx2/Vif_Unpack.h b/pcsx2/Vif_Unpack.h index 825d00b9ca..a2b41b3ce5 100644 --- a/pcsx2/Vif_Unpack.h +++ b/pcsx2/Vif_Unpack.h @@ -15,44 +15,28 @@ #pragma once -typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, const u32 *data); -typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, const u32 *data, int size); -typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, const u32 *data, int size); +struct vifStruct; -#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, const u##bits *data); -#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, const u##bits *data, int size); -#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, const s##bits *data); -#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, const s##bits *data, int size); +typedef void (__fastcall *UNPACKFUNCTYPE)(void* dest, const void* src); + +#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_u##bits)(u32* dest, const u##bits* src); +#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_s##bits)(u32* dest, const s##bits* src); #define create_some_unpacks(bits) \ create_unpack_u_type(bits); \ - create_unpack_odd_u_type(bits); \ create_unpack_s_type(bits); \ - create_unpack_odd_s_type(bits); create_some_unpacks(32); create_some_unpacks(16); create_some_unpacks(8); -struct VIFUnpackFuncTable -{ - UNPACKFUNCTYPE funcU; - UNPACKFUNCTYPE funcS; +extern __aligned16 const u8 nVifT[16]; - UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed. - UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed. +// Array sub-dimension order: [vifidx] [mode] (VN * VL * USN * doMask) +extern __aligned16 const UNPACKFUNCTYPE VIFfuncTable[2][3][(4 * 4 * 2 * 2)]; - u8 bsize; // currently unused - u8 dsize; // byte size of one channel - u8 gsize; // size of data in bytes used for each write cycle - u8 qsize; // used for unpack parts, num of vectors that - // will be decompressed from data for 1 cycle -}; - -extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32]; - -extern int nVifUnpack (int idx, const u8 *data); +_vifT extern int nVifUnpack (const u8* data); extern void resetNewVif(int idx); template< int idx > -extern void vifUnpackSetup(const u32 *data); +extern void vifUnpackSetup(const u32* data); diff --git a/pcsx2/Vif_Unpack.inl b/pcsx2/Vif_Unpack.inl deleted file mode 100644 index 8462e63d73..0000000000 --- a/pcsx2/Vif_Unpack.inl +++ /dev/null @@ -1,159 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#pragma once - -// Old Vif Unpack Code -// Only here for testing/reference -template void VIFunpack(u32 *data, vifCode *v, u32 size) { - //if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data); - VURegs * VU; - u8 *cdata = (u8*)data; - u32 tempsize = 0; - const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000; - - if (VIFdmanum == 0) { - VU = &VU0; - vifRegs = &vif0Regs; - vif = &vif0; - } - else { - VU = &VU1; - vifRegs = &vif1Regs; - vif = &vif1; - } - - u32 *dest = (u32*)(VU->Mem + v->addr); - - const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] ); - UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS; - - size <<= 2; - - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write - if (v->addr >= memlimit) { - DevCon.Warning("Overflown at the start"); - v->addr &= (memlimit - 1); - dest = (u32*)(VU->Mem + v->addr); - } - - size = std::min(size, vifRegs->num * ft.gsize); //size will always be the same or smaller - - tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) * - (vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16); - - //Sanity Check (memory overflow) - if (tempsize > memlimit) { - if (((vifRegs->cycle.cl != vifRegs->cycle.wl) && - ((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) { - //It's a red herring, so ignore it! SSE unpacks will be much quicker. - DevCon.WriteLn("what!!!!!!!!!"); - //tempsize = 0; - tempsize = size; - size = 0; - } - else { - DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); - tempsize = size; - size = 0; - } - } - else { - tempsize = size; - size = 0; - } - if (tempsize) { - int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; - size = 0; - int addrstart = v->addr; - //if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize"); - - //DbgCon.WriteLn("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr); - - while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) { - if(v->addr >= memlimit) { - DevCon.Warning("Mem limit overflow"); - v->addr &= (memlimit - 1); - dest = (u32*)(VU->Mem + v->addr); - } - - func(dest, (u32*)cdata); - cdata += ft.gsize; - tempsize -= ft.gsize; - - vifRegs->num--; - vif->cl++; - - if (vif->cl == vifRegs->cycle.wl) { - dest += incdest; - v->addr +=(incdest * 4); - vif->cl = 0; - } - else { - dest += 4; - v->addr += 16; - } - } - if (v->addr >= memlimit) { - v->addr &=(memlimit - 1); - dest = (u32*)(VU->Mem + v->addr); - } - v->addr = addrstart; - if(tempsize > 0) size = tempsize; - } - - if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have - VIF_LOG("warning, end with size = %d", size); - // unpack one qword - //v->addr += (size / ft.dsize) * 4; - (vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize); - size = 0; - - //DbgCon.WriteLn("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr); - } - } - else { // filling write - if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P - if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num) - DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl); - - DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr); - while (vifRegs->num > 0) { - if (vif->cl == vifRegs->cycle.wl) { - vif->cl = 0; - } - // unpack one qword - if (vif->cl < vifRegs->cycle.cl) { - if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; } - func(dest, (u32*)cdata); - cdata += ft.gsize; - size -= ft.gsize; - vif->cl++; - vifRegs->num--; - if (vif->cl == vifRegs->cycle.wl) { - vif->cl = 0; - } - } - else { - func(dest, (u32*)cdata); - v->addr += 16; - vifRegs->num--; - vif->cl++; - } - dest += 4; - if (vifRegs->num == 0) break; - } - } -} diff --git a/pcsx2/ps2/GIFpath.cpp b/pcsx2/ps2/GIFpath.cpp index 726cb9811d..028529166b 100644 --- a/pcsx2/ps2/GIFpath.cpp +++ b/pcsx2/ps2/GIFpath.cpp @@ -58,10 +58,15 @@ enum GIF_REG // are modified during the GIFtag unpacking process. struct GIFTAG { - u32 NLOOP : 15; - u32 EOP : 1; - u32 _dummy0 : 16; + u16 NLOOP : 15; + u16 EOP : 1; + + // Note that contents of the Dummy bits on real hardware is likely used to maintain state + // information regarding tag processing (namely nllop and curreg info, so to resume partial + // transfers later). + u16 _dummy0 : 16; u32 _dummy1 : 14; + u32 PRE : 1; u32 PRIM : 11; u32 FLG : 2; diff --git a/pcsx2/ps2/Iop/IopHw_Internal.h b/pcsx2/ps2/Iop/IopHw_Internal.h index a6ced5bb75..2219f72544 100644 --- a/pcsx2/ps2/Iop/IopHw_Internal.h +++ b/pcsx2/ps2/Iop/IopHw_Internal.h @@ -203,7 +203,7 @@ template< typename T> static __ri void IopHwTraceLog( u32 addr, T val, bool mode ) { if (!IsDevBuild) return; - if (!EmuConfig.Trace.IOP.m_EnableRegisters) return; + if (!EmuConfig.Trace.Enabled || !EmuConfig.Trace.IOP.m_EnableAll || !EmuConfig.Trace.IOP.m_EnableRegisters) return; FastFormatAscii valStr; FastFormatAscii labelStr; diff --git a/pcsx2/ps2/eeHwTraceLog.inl b/pcsx2/ps2/eeHwTraceLog.inl index 388d40378a..feea08d081 100644 --- a/pcsx2/ps2/eeHwTraceLog.inl +++ b/pcsx2/ps2/eeHwTraceLog.inl @@ -243,7 +243,7 @@ template< typename T> static __ri void eeHwTraceLog( u32 addr, T val, bool mode ) { if (!IsDevBuild) return; - if (!EmuConfig.Trace.EE.m_EnableRegisters) return; + if (!EmuConfig.Trace.Enabled || !EmuConfig.Trace.EE.m_EnableAll || !EmuConfig.Trace.EE.m_EnableRegisters) return; FastFormatAscii valStr; FastFormatAscii labelStr; diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index d9c30caaa3..16ebf1cf41 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -831,10 +831,6 @@ RelativePath="..\..\Vif_Unpack.h" > - - diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 1c3644f2b8..6fc18f0bbd 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -39,7 +39,7 @@ using namespace x86Emitter; -extern u32 g_psxNextBranchCycle; +extern u32 g_iopNextEventCycle; extern void psxBREAK(); u32 g_psxMaxRecMem = 0; @@ -121,7 +121,7 @@ static DynGenFunc* iopExitRecompiledCode = NULL; static void recEventTest() { - _cpuBranchTest_Shared(); + _cpuEventTest_Shared(); } // parameters: @@ -876,28 +876,28 @@ static void recExecute() static __noinline s32 recExecuteBlock( s32 eeCycles ) { - psxBreak = 0; - psxCycleEE = eeCycles; + iopBreak = 0; + iopCycleEE = eeCycles; // [TODO] recExecuteBlock could be replaced by a direct call to the iopEnterRecompiledCode() // (by assigning its address to the psxRec structure). But for that to happen, we need - // to move psxBreak/psxCycleEE update code to emitted assembly code. >_< --air + // to move iopBreak/iopCycleEE update code to emitted assembly code. >_< --air // Likely Disasm, as borrowed from MSVC: // Entry: // mov eax,dword ptr [esp+4] -// mov dword ptr [psxBreak (0E88DCCh)],0 -// mov dword ptr [psxCycleEE (832A84h)],eax +// mov dword ptr [iopBreak (0E88DCCh)],0 +// mov dword ptr [iopCycleEE (832A84h)],eax // Exit: -// mov ecx,dword ptr [psxBreak (0E88DCCh)] -// mov edx,dword ptr [psxCycleEE (832A84h)] +// mov ecx,dword ptr [iopBreak (0E88DCCh)] +// mov edx,dword ptr [iopCycleEE (832A84h)] // lea eax,[edx+ecx] iopEnterRecompiledCode(); - return psxBreak + psxCycleEE; + return iopBreak + iopCycleEE; } // Returns the offset to the next instruction after any cleared memory @@ -1021,19 +1021,19 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) { xMOV(eax, ptr32[&psxRegs.cycle]); xMOV(ecx, eax); - xMOV(edx, ptr32[&psxCycleEE]); + xMOV(edx, ptr32[&iopCycleEE]); xADD(edx, 7); xSHR(edx, 3); xADD(eax, edx); - xCMP(eax, ptr32[&g_psxNextBranchCycle]); - xCMOVNS(eax, ptr32[&g_psxNextBranchCycle]); + xCMP(eax, ptr32[&g_iopNextEventCycle]); + xCMOVNS(eax, ptr32[&g_iopNextEventCycle]); xMOV(ptr32[&psxRegs.cycle], eax); xSUB(eax, ecx); xSHL(eax, 3); - xSUB(ptr32[&psxCycleEE], eax); + xSUB(ptr32[&iopCycleEE], eax); xJLE(iopExitRecompiledCode); - xCALL(psxBranchTest); + xCALL(iopEventTest); if( newpc != 0xffffffff ) { @@ -1047,15 +1047,15 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) xADD(eax, blockCycles); xMOV(ptr32[&psxRegs.cycle], eax); // update cycles - // jump if psxCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE) - xSUB(ptr32[&psxCycleEE], blockCycles*8); + // jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE) + xSUB(ptr32[&iopCycleEE], blockCycles*8); xJLE(iopExitRecompiledCode); // check if an event is pending - xSUB(eax, ptr32[&g_psxNextBranchCycle]); + xSUB(eax, ptr32[&g_iopNextEventCycle]); xForwardJS nointerruptpending; - xCALL(psxBranchTest); + xCALL(iopEventTest); if( newpc != 0xffffffff ) { xCMP(ptr32[&psxRegs.pc], newpc); @@ -1098,7 +1098,7 @@ void rpsxSYSCALL() j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); - SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); + SUB32ItoM((uptr)&iopCycleEE, psxScaleBlockCycles()*8 ); JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); // jump target for skipping blockCycle updates @@ -1120,7 +1120,7 @@ void rpsxBREAK() CMP32ItoM((uptr)&psxRegs.pc, psxpc-4); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); - SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); + SUB32ItoM((uptr)&iopCycleEE, psxScaleBlockCycles()*8 ); JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); @@ -1373,7 +1373,7 @@ StartRecomp: else { ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); - SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); + SUB32ItoM((uptr)&iopCycleEE, psxScaleBlockCycles()*8 ); } if (willbranch3 || !psxbranch) { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index db0201f96b..2a4521b51c 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -316,7 +316,7 @@ void recBranchCall( void (*func)() ) // to the current cpu cycle. MOV32MtoR( EAX, (uptr)&cpuRegs.cycle ); - MOV32RtoM( (uptr)&g_nextBranchCycle, EAX ); + MOV32RtoM( (uptr)&g_nextEventCycle, EAX ); recCall(func); branch = 2; @@ -350,7 +350,7 @@ static DynGenFunc* ExitRecompiledCode = NULL; static void recEventTest() { - _cpuBranchTest_Shared(); + _cpuEventTest_Shared(); } // parameters: @@ -1111,11 +1111,11 @@ static void iBranchTest(u32 newpc) // Check the Event scheduler if our "cycle target" has been reached. // Equiv code to: // cpuRegs.cycle += blockcycles; - // if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); } + // if( cpuRegs.cycle > g_nextEventCycle ) { DoEvents(); } if (EmuConfig.Speedhacks.WaitLoop && s_nBlockFF && newpc == s_branchTo) { - xMOV(eax, ptr32[&g_nextBranchCycle]); + xMOV(eax, ptr32[&g_nextEventCycle]); xADD(ptr32[&cpuRegs.cycle], eeScaleBlockCycles()); xCMP(eax, ptr32[&cpuRegs.cycle]); xCMOVS(eax, ptr32[&cpuRegs.cycle]); @@ -1128,7 +1128,7 @@ static void iBranchTest(u32 newpc) xMOV(eax, ptr[&cpuRegs.cycle]); xADD(eax, eeScaleBlockCycles()); xMOV(ptr[&cpuRegs.cycle], eax); // update cycles - xSUB(eax, ptr[&g_nextBranchCycle]); + xSUB(eax, ptr[&g_nextEventCycle]); if (newpc == 0xffffffff) xJS( DispatcherReg ); diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index 930215de27..7f63bb6b91 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -32,12 +32,13 @@ typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src); #include "newVif_HashBucket.h" extern void mVUmergeRegs(const xRegisterSSE& dest, const xRegisterSSE& src, int xyzw, bool modXYZW = 0); -extern void _nVifUnpack (int idx, const u8 *data, u32 size, bool isFill); -extern void dVifUnpack (int idx, const u8 *data, u32 size, bool isFill); +extern void _nVifUnpack (int idx, const u8* data, uint mode, bool isFill); extern void dVifReset (int idx); extern void dVifClose (int idx); extern void VifUnpackSSE_Init(); +_vifT extern void dVifUnpack (const u8* data, bool isFill); + #define VUFT VIFUnpackFuncTable #define _v0 0 #define _v1 0x55 @@ -50,11 +51,6 @@ extern void VifUnpackSSE_Init(); #define xmmRow xmm6 #define xmmTemp xmm7 -#ifdef _MSC_VER -# pragma pack(1) -# pragma warning(disable:4996) // 'function': was declared deprecated -#endif - // nVifBlock - Ordered for Hashing; the 'num' field and the lower 6 bits of upkType are // used as the hash bucket selector. // @@ -62,30 +58,25 @@ struct __aligned16 nVifBlock { u8 num; // [00] Num Field u8 upkType; // [01] Unpack Type [usn*1:mask*1:upk*4] u8 mode; // [02] Mode Field - u8 scl; // [03] Start Cycle - u8 cl; // [04] CL Field - u8 wl; // [05] WL Field - u32 mask; // [06] Mask Field - u8 padding[2];// [10] through [11] + u8 cl; // [03] CL Field + u32 mask; // [04] Mask Field + u8 wl; // [08] WL Field + u8 padding[3];// [09] through [11] uptr startPtr; // [12] Start Ptr of RecGen Code -} __packed; // 16 bytes - -#ifdef _MSC_VER -# pragma pack() -#endif +}; // 16 bytes #define _hSize 0x4000 // [usn*1:mask*1:upk*4:num*8] hash... #define _cmpS (sizeof(nVifBlock) - (4)) #define _tParams nVifBlock, _hSize, _cmpS struct nVifStruct { - u32 idx; // VIF0 or VIF1 - vifStruct* vif; // Vif Struct ptr - VIFregisters* vifRegs; // Vif Regs ptr - VURegs* VU; // VU Regs ptr - u32 vuMemLimit; // Use for fast AND + + // Buffer for partial transfers (should always be first to ensure alignment) + // Maximum buffer size is 256 (vifRegs.Num max range) * 16 (quadword) + __aligned16 u8 buffer[256*16]; u32 bSize; // Size of 'buffer' u32 bPtr; - u8 buffer[_1mb]; // Buffer for partial transfers + + u32 idx; // VIF0 or VIF1 u8* recPtr; // Cur Pos to recompile to u8* recEnd; // 'Safe' End of Rec Cache BlockBuffer* vifCache; // Block Buffer @@ -103,9 +94,7 @@ struct nVifStruct { }; extern __aligned16 nVifStruct nVif[2]; -extern __aligned16 const u8 nVifT[16]; extern __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle] extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector] -static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl diff --git a/pcsx2/x86/newVif_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp index eeb73db05c..8325341da5 100644 --- a/pcsx2/x86/newVif_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -58,6 +58,7 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo usn = (vB.upkType>>5) & 1; doMask = (vB.upkType>>4) & 1; doMode = vB.mode & 3; + vCL = 0; } #define makeMergeMask(x) { \ @@ -65,15 +66,15 @@ VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlo } __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const { + const vifStruct& vif = v.idx ? vif1 : vif0; + u32 m0 = vB.mask; u32 m1 = m0 & 0xaaaaaaaa; u32 m2 =(~m1>>1) & m0; u32 m3 = (m1>>1) & ~m0; - u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0; - u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0; - if((m2&&(doMask||isFill))||doMode) { xMOVAPS(xmmRow, ptr32[row]); } + if((m2&&(doMask||isFill))||doMode) { xMOVAPS(xmmRow, ptr128[&vif.MaskRow]); } if (m3&&(doMask||isFill)) { - xMOVAPS(xmmCol0, ptr32[col]); + xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]); if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3); @@ -95,8 +96,8 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { makeMergeMask(m3); makeMergeMask(m4); if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect - if (doMask&&m2) { mergeVectors(regX, xmmRow, t, m2); } // Merge Row - if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), t, m3); } // Merge Col + if (doMask&&m2) { mergeVectors(regX, xmmRow, t, m2); } // Merge MaskRow + if (doMask&&m3) { mergeVectors(regX, xRegisterSSE(xmmCol0.Id+cc), t, m3); } // Merge MaskCol if (doMask&&m4) { mergeVectors(regX, xmmTemp, t, m4); } // Merge Write Protect if (doMode) { u32 m5 = (~m1>>1) & ~m0; @@ -117,8 +118,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { } void VifUnpackSSE_Dynarec::writeBackRow() const { - u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0; - xMOVAPS(ptr32[row], xmmRow); + xMOVAPS(ptr128[&((v.idx ? vif1 : vif0).MaskRow)], xmmRow); DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]"); // ToDo: Do we need to write back to vifregs.rX too!? :/ } @@ -139,15 +139,16 @@ static void ShiftDisplacementWindow( xAddressVoid& addr, const xRegister32& modR } void VifUnpackSSE_Dynarec::CompileRoutine() { - const int upkNum = v.vif->cmd & 0xf; + const int upkNum = vB.upkType & 0xf; const u8& vift = nVifT[upkNum]; const int cycleSize = isFill ? vB.cl : vB.wl; const int blockSize = isFill ? vB.wl : vB.cl; const int skipSize = blockSize - cycleSize; + + uint vNum = vB.num ? vB.num : 256; + doMode = (upkNum == 0xf) ? 0 : doMode; // V4_5 has no mode feature. - int vNum = v.vifRegs->num; - vCL = v.vif->cl; - doMode = upkNum == 0xf ? 0 : doMode; + pxAssume(vCL == 0); // Value passed determines # of col regs we need to load SetMasks(isFill ? blockSize : cycleSize); @@ -189,14 +190,17 @@ void VifUnpackSSE_Dynarec::CompileRoutine() { } if (doMode==2) writeBackRow(); - xMOV(ptr32[&v.vif->cl], vCL); - xMOV(ptr32[&v.vifRegs->num], vNum); xRET(); } -static __noinline u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) { - u8* startmem = v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit); - u8* endmem = v.VU->Mem + (v.vuMemLimit+0x10); +_vifT static __fi u8* dVifsetVUptr(uint cl, uint wl, bool isFill) { + vifStruct& vif = GetVifX; + VIFregisters& vifRegs = vifXRegs; + const VURegs& VU = vuRegs[idx]; + const uint vuMemLimit = idx ? 0x4000 : 0x1000; + + u8* startmem = VU.Mem + (vif.tag.addr & (vuMemLimit-0x10)); + u8* endmem = VU.Mem + vuMemLimit; uint length = _vBlock.num * 16; if (!isFill) { @@ -204,15 +208,15 @@ static __noinline u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isF // shouldn't count as wrapped data. Otherwise, a trailing skip can cause the emu to drop back // to the interpreter. -- Refraction (test with MGS3) - int skipSize = (cl - wl) * 16; - int blocks = _vBlock.num / wl; + uint skipSize = (cl - wl) * 16; + uint blocks = _vBlock.num / wl; length += (blocks-1) * skipSize; } if ( (startmem+length) <= endmem ) { return startmem; } - //Console.WriteLn("nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", v.idx, v.vif->tag.addr, v.vif->tag.addr + (_vBlock.num * 16), _vBlock.num, wl, cl); + //Console.WriteLn("nVif%x - VU Mem Ptr Overflow; falling back to interpreter. Start = %x End = %x num = %x, wl = %x, cl = %x", v.idx, vif.tag.addr, vif.tag.addr + (_vBlock.num * 16), _vBlock.num, wl, cl); return NULL; // Fall Back to Interpreters which have wrap-around logic } @@ -227,46 +231,52 @@ static __fi void dVifRecLimit(int idx) { } } -// Gcc complains about recursive functions being inlined. -void dVifUnpack(int idx, const u8 *data, u32 size, bool isFill) { - +_vifT static __fi bool dVifExecuteUnpack(const u8* data, bool isFill) +{ const nVifStruct& v = nVif[idx]; - const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5); - const int doMask = v.vif->cmd & 0x10; - const int cycle_cl = v.vifRegs->cycle.cl; - const int cycle_wl = v.vifRegs->cycle.wl; - const int blockSize = isFill ? cycle_wl : cycle_cl; - - if (v.vif->cl >= blockSize) v.vif->cl = 0; - - _vBlock.upkType = upkType; - _vBlock.num = (u8&)v.vifRegs->num; - _vBlock.mode = (u8&)v.vifRegs->mode; - _vBlock.scl = v.vif->cl; - _vBlock.cl = cycle_cl; - _vBlock.wl = cycle_wl; - - // Zero out the mask parameter if it's unused -- games leave random junk - // values here which cause false recblock cache misses. - _vBlock.mask = doMask ? v.vifRegs->mask : 0; + VIFregisters& vifRegs = vifXRegs; if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { - if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) { + if (u8* dest = dVifsetVUptr(vifRegs.cycle.cl, vifRegs.cycle.wl, isFill)) { //DevCon.WriteLn("Running Recompiled Block!"); ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); } else { //DevCon.WriteLn("Running Interpreter Block"); - _nVifUnpack(idx, data, size, isFill); + _nVifUnpack(idx, data, vifRegs.mode, isFill); } - return; + return true; } + return false; +} + +_vifT __fi void dVifUnpack(const u8* data, bool isFill) { + + const nVifStruct& v = nVif[idx]; + vifStruct& vif = GetVifX; + VIFregisters& vifRegs = vifXRegs; + + const u8 upkType = (vif.cmd & 0x1f) | (vif.usn << 5); + const int doMask = (vif.cmd & 0x10); + + _vBlock.upkType = upkType; + _vBlock.num = (u8&)vifRegs.num; + _vBlock.mode = (u8&)vifRegs.mode; + _vBlock.cl = vifRegs.cycle.cl; + _vBlock.wl = vifRegs.cycle.wl; + + // Zero out the mask parameter if it's unused -- games leave random junk + // values here which cause false recblock cache misses. + _vBlock.mask = doMask ? vifRegs.mask : 0; + //DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++); //DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]", // _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode, // doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored" //); + if (dVifExecuteUnpack(data, isFill)) return; + xSetPtr(v.recPtr); _vBlock.startPtr = (uptr)xGetAlignedCallTarget(); v.vifBlocks->add(_vBlock); @@ -279,5 +289,8 @@ void dVifUnpack(int idx, const u8 *data, u32 size, bool isFill) { // Run the block we just compiled. Various conditions may force us to still use // the interpreter unpacker though, so a recursive call is the safest way here... - dVifUnpack(idx, data, size, isFill); + dVifExecuteUnpack(data, isFill); } + +template void dVifUnpack<0>(const u8* data, bool isFill); +template void dVifUnpack<1>(const u8* data, bool isFill); diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index 882c579575..0c692292c9 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -21,12 +21,20 @@ #include "Common.h" #include "Vif_Dma.h" #include "newVif.h" -#include "Vif_Unpack.inl" __aligned16 nVifStruct nVif[2]; -__aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle] -__aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector] +// Interpreter-style SSE unpacks. Array layout matches the interpreter C unpacks. +// ([USN][Masking][Unpack Type]) [curCycle] +__aligned16 nVifCall nVifUpk[(2*2*16) *4]; + +// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks +// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly. +// [MaskNumber][CycleNumber][Vector] +__aligned16 u32 nVifMask[3][4][4] = {0}; + +// Number of bytes of data in the source stream needed for each vector. +// [equivalent to ((32 >> VL) * (VN+1)) / 8] __aligned16 const u8 nVifT[16] = { 4, // S-32 2, // S-16 @@ -47,26 +55,18 @@ __aligned16 const u8 nVifT[16] = { }; // ---------------------------------------------------------------------------- -template< int idx, bool doMode, bool isFill, bool singleUnpack > -__ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size); +template< int idx, bool doMode, bool isFill > +__ri void __fastcall _nVifUnpackLoop(const u8* data); -typedef void __fastcall FnType_VifUnpackLoop(const u8 *data, u32 size); +typedef void __fastcall FnType_VifUnpackLoop(const u8* data); typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop; // Unpacks Until 'Num' is 0 static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = { - {{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> }, - { _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },}, - {{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> }, - { _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },}, -}; - -// Unpacks until 1 normal write cycle unpack has been written to VU mem -static const __aligned16 Fnptr_VifUnpackLoop UnpackSingleTable[2][2][2] = { - {{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> }, - { _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },}, - {{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> }, - { _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },}, + {{ _nVifUnpackLoop<0,0,0>, _nVifUnpackLoop<0,0,1> }, + { _nVifUnpackLoop<0,1,0>, _nVifUnpackLoop<0,1,1> },}, + {{ _nVifUnpackLoop<1,0,0>, _nVifUnpackLoop<1,0,1> }, + { _nVifUnpackLoop<1,1,0>, _nVifUnpackLoop<1,1,1> },}, }; // ---------------------------------------------------------------------------- @@ -76,10 +76,6 @@ void resetNewVif(int idx) // changed for some reason. nVif[idx].idx = idx; - nVif[idx].VU = idx ? &VU1 : &VU0; - nVif[idx].vuMemLimit = idx ? 0x3ff0 : 0xff0; - nVif[idx].vif = &GetVifX; - nVif[idx].vifRegs = &vifXRegs; nVif[idx].bSize = 0; memzero(nVif[idx].buffer); @@ -90,65 +86,75 @@ void closeNewVif(int idx) { if (newVifDynaRec) dVifClose(idx); } -static __fi u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) { - return (u8*)(vuMemBase + ( offset & (vuidx ? 0x3ff0 : 0xff0) )); +static __fi u8* getVUptr(uint idx, int offset) { + return (u8*)(vuRegs[idx].Mem + ( offset & (idx ? 0x3ff0 : 0xff0) )); } -static __fi void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) { - pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check - ptr += amount; - vif->tag.addr += amount; - int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000)); - if (diff >= 0) { - ptr = (u8*)(vuMemBase + diff); - } -} -static __fi void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) { - pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check - ptr += 16; - vif->tag.addr += 16; - if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) ) { - ptr -= (vuidx ? 0x4000 : 0x1000); - } -} - -int nVifUnpack(int idx, const u8* data) { +_vifT int nVifUnpack(const u8* data) { nVifStruct& v = nVif[idx]; - vif = v.vif; - vifRegs = v.vifRegs; + vifStruct& vif = GetVifX; + VIFregisters& vifRegs = vifXRegs; - const int ret = aMin(vif->vifpacketsize, vif->tag.size); - const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl); + const uint ret = aMin(vif.vifpacketsize, vif.tag.size); + const bool isFill = (vifRegs.cycle.cl < vifRegs.cycle.wl); s32 size = ret << 2; - if (ret == v.vif->tag.size) { // Full Transfer + if (ret == vif.tag.size) { // Full Transfer if (v.bSize) { // Last transfer was partial memcpy_fast(&v.buffer[v.bSize], data, size); - v.bSize += size; - data = v.buffer; - size = v.bSize; + v.bSize += size; + data = v.buffer; + + vif.cl = 0; + vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input. + if (!vifRegs.num) vifRegs.num = 256; } - if (size > 0 || isFill) { - if (newVifDynaRec) dVifUnpack(idx, data, size, isFill); - else _nVifUnpack(idx, data, size, isFill); - } - vif->tag.size = 0; - vif->cmd = 0; - v.bSize = 0; + + if (newVifDynaRec) dVifUnpack(data, isFill); + else _nVifUnpack(idx, data, vifRegs.mode, isFill); + + vif.tag.size = 0; + vif.cmd = 0; + vifRegs.num = 0; + v.bSize = 0; } else { // Partial Transfer memcpy_fast(&v.buffer[v.bSize], data, size); - v.bSize += size; - vif->tag.size -= ret; + v.bSize += size; + vif.tag.size -= ret; + + const u8& vSize = nVifT[vif.cmd & 0x0f]; + + // We need to provide accurate accounting of the NUM register, in case games decided + // to read back from it mid-transfer. Since so few games actually use partial transfers + // of VIF unpacks, this code should not be any bottleneck. + + while (size >= vSize) { + --vifRegs.num; + ++vif.cl; + + if (isFill) { + if (vif.cl < vifRegs.cycle.cl) size -= vSize; + else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0; + } + else + { + size -= vSize; + if (vif.cl >= vifRegs.cycle.wl) vif.cl = 0; + } + } } return ret; } -static void setMasks(int idx, const VIFregisters& v) { - u32* row = idx ? g_vifmask.Row1 : g_vifmask.Row0; - u32* col = idx ? g_vifmask.Col1 : g_vifmask.Col0; +template int nVifUnpack<0>(const u8* data); +template int nVifUnpack<1>(const u8* data); + +// This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks +// and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly. +static void setMasks(const vifStruct& vif, const VIFregisters& v) { for (int i = 0; i < 16; i++) { int m = (v.mask >> (i*2)) & 3; switch (m) { @@ -157,15 +163,15 @@ static void setMasks(int idx, const VIFregisters& v) { nVifMask[1][i/4][i%4] = 0; nVifMask[2][i/4][i%4] = 0; break; - case 1: // Row + case 1: // MaskRow nVifMask[0][i/4][i%4] = 0; nVifMask[1][i/4][i%4] = 0; - nVifMask[2][i/4][i%4] = newVifDynaRec ? row[i%4] : ((u32*)&v.r0)[(i%4)*4]; + nVifMask[2][i/4][i%4] = vif.MaskRow._u32[i%4]; break; - case 2: // Col + case 2: // MaskCol nVifMask[0][i/4][i%4] = 0; nVifMask[1][i/4][i%4] = 0; - nVifMask[2][i/4][i%4] = newVifDynaRec ? col[i/4] : ((u32*)&v.c0)[(i/4)*4]; + nVifMask[2][i/4][i%4] = vif.MaskCol._u32[i/4]; break; case 3: // Write Protect nVifMask[0][i/4][i%4] = 0; @@ -184,80 +190,81 @@ static void setMasks(int idx, const VIFregisters& v) { // a "win" to move code outside the loop, like normally in most other loop scenarios. // // The biggest bottleneck of the current code is the call/ret needed to invoke the SSE -// unpackers. A better option is to generate the entire vifRegs->num loop code as part +// unpackers. A better option is to generate the entire vifRegs.num loop code as part // of the SSE template, and inline the SSE code into the heart of it. This both avoids // the call/ret and opens the door for resolving some register dependency chains in the // current emitted functions. (this is what zero's SSE does to get it's final bit of // speed advantage over the new vif). --air // -// As a secondary optimization to above, special handlers could be generated for the -// cycleSize==1 case, which is used frequently enough, and results in enough code -// elimination that it would probably be a win in most cases (and for sure in many -// "slow" games that need it most). --air +// The BEST optimizatin strategy here is to use data available to us from the UNPACK dispatch +// -- namely the unpack type and mask flag -- in combination mode and usn values -- to +// generate ~600 special versions of this function. But since it's an interpreter, who gives +// a crap? Really? :p +// -template< int idx, bool doMode, bool isFill, bool singleUnpack > -__ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size) { +// size - size of the packet fragment incoming from DMAC. +template< int idx, bool doMode, bool isFill > +__ri void __fastcall _nVifUnpackLoop(const u8* data) { - const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl; - const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl; - const int skipSize = blockSize - cycleSize; - //DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs->num, upkNum, vif->cl, blockSize, skipSize); + vifStruct& vif = GetVifX; + VIFregisters& vifRegs = vifXRegs; - if (vif->cmd & 0x10) setMasks(idx, *vifRegs); + // skipSize used for skipping writes only + const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16; - const int usn = !!(vif->usn); - const int upkNum = vif->cmd & 0x1f; - //const s8& vift = nVifT[upkNum]; // might be useful later when other SSE paths are finished. + //DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize); + + if (!doMode && (vif.cmd & 0x10)) setMasks(vif, vifRegs); + + const int usn = !!vif.usn; + const int upkNum = vif.cmd & 0x1f; + const u8& vSize = nVifT[upkNum & 0x0f]; + //uint vl = vif.cmd & 0x03; + //uint vn = (vif.cmd >> 2) & 0x3; + //uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ]; - const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum]; - UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS; + const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ]; - const u8* vuMemBase = (idx ? VU1 : VU0).Mem; - u8* dest = setVUptr(idx, vuMemBase, vif->tag.addr); - if (vif->cl >= blockSize) vif->cl = 0; + pxAssume (vif.cl == 0); + pxAssume (vifRegs.cycle.wl > 0); - while (vifRegs->num) { - if (vif->cl < cycleSize) { - if (size < ft.gsize) break; - if (doMode) { - //DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum); - func((u32*)dest, (u32*)data); - } - else { - //DevCon.WriteLn("SSE Unpack!"); - fnbase[aMin(vif->cl, 3)](dest, data); - } - data += ft.gsize; - size -= ft.gsize; - vifRegs->num--; - incVUptrBy16(idx, dest, vuMemBase); - if (++vif->cl == blockSize) vif->cl = 0; - if (singleUnpack) return; - } - else if (isFill) { - //DevCon.WriteLn("isFill!"); - func((u32*)dest, (u32*)data); - vifRegs->num--; - incVUptrBy16(idx, dest, vuMemBase); - if (++vif->cl == blockSize) vif->cl = 0; + do { + u8* dest = getVUptr(idx, vif.tag.addr); + + if (doMode) { + //if (1) { + ft(dest, data); } else { - incVUptr(idx, dest, vuMemBase, 16 * skipSize); - vif->cl = 0; + //DevCon.WriteLn("SSE Unpack!"); + uint cl3 = aMin(vif.cl,3); + fnbase[cl3](dest, data); } - } + + vif.tag.addr += 16; + --vifRegs.num; + ++vif.cl; + + if (isFill) { + //DevCon.WriteLn("isFill!"); + if (vif.cl < vifRegs.cycle.cl) data += vSize; + else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0; + } + else + { + data += vSize; + + if (vif.cl >= vifRegs.cycle.wl) { + vif.tag.addr += skipSize; + vif.cl = 0; + } + } + } while (vifRegs.num); } -__fi void _nVifUnpack(int idx, const u8 *data, u32 size, bool isFill) { +__fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill) { - if (useOldUnpack) { - if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2); - else VIFunpack<1>((u32*)data, &vif1.tag, size>>2); - return; - } - - const bool doMode = !!vifRegs->mode; - UnpackLoopTable[idx][doMode][isFill]( data, size ); + UnpackLoopTable[idx][!!mode][isFill]( data ); } diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index 465cd4e8a5..010913aa97 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -36,31 +36,6 @@ void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xy } } -// Loads Row/Col Data from vifRegs instead of g_vifmask -// Useful for testing vifReg and g_vifmask inconsistency. -void loadRowCol(nVifStruct& v) { - xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]); - xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]); - xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]); - xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]); - - xPSHUF.D(xmm0, xmm0, _v0); - xPSHUF.D(xmm1, xmm1, _v0); - xPSHUF.D(xmm2, xmm2, _v0); - xPSHUF.D(xmm6, xmm6, _v0); - mVUmergeRegs(xmm6, xmm0, 8); - mVUmergeRegs(xmm6, xmm1, 4); - mVUmergeRegs(xmm6, xmm2, 2); - xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]); - xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]); - xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]); - xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]); - xPSHUF.D(xmm2, xmm2, _v0); - xPSHUF.D(xmm3, xmm3, _v0); - xPSHUF.D(xmm4, xmm4, _v0); - xPSHUF.D(xmm5, xmm5, _v0); -} - // ===================================================================================================== // VifUnpackSSE_Base Section // ===================================================================================================== diff --git a/pcsx2/x86/newVif_UnpackSSE.h b/pcsx2/x86/newVif_UnpackSSE.h index 0dc74a19e7..231a080b35 100644 --- a/pcsx2/x86/newVif_UnpackSSE.h +++ b/pcsx2/x86/newVif_UnpackSSE.h @@ -25,7 +25,6 @@ using namespace x86Emitter; extern void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw); -extern void loadRowCol(nVifStruct& v); // -------------------------------------------------------------------------------------- // VifUnpackSSE_Base diff --git a/plugins/zzogl-pg/opengl/CMakeLists.txt b/plugins/zzogl-pg/opengl/CMakeLists.txt index 872dd6c8db..3d9c4e913b 100644 --- a/plugins/zzogl-pg/opengl/CMakeLists.txt +++ b/plugins/zzogl-pg/opengl/CMakeLists.txt @@ -14,6 +14,7 @@ set(CommonFlags -DZEROGS_SSE2 -fno-regmove -fno-strict-aliasing + -Wstrict-aliasing # Allow to track strict aliasing issue. -Wno-format -Wno-unused-parameter -Wno-unused-value @@ -94,6 +95,7 @@ set(zzoglHeaders zerogsmath.h zpipe.h ZZoglCRTC.h + ZZoglShaders.h ZZGl.h ZZLog.h) @@ -155,6 +157,9 @@ target_link_libraries(${Output} ${OPENGL_LIBRARIES}) # link target with X11 target_link_libraries(${Output} ${X11_LIBRARIES}) +# link target with jpeg +target_link_libraries(${Output} ${JPEG_LIBRARIES}) + # User flags options if(NOT USER_CMAKE_LD_FLAGS STREQUAL "") target_link_libraries(${Output} "${USER_CMAKE_LD_FLAGS}") diff --git a/plugins/zzogl-pg/opengl/GLWin32.cpp b/plugins/zzogl-pg/opengl/GLWin32.cpp index e154de0b3d..da64f16fe8 100644 --- a/plugins/zzogl-pg/opengl/GLWin32.cpp +++ b/plugins/zzogl-pg/opengl/GLWin32.cpp @@ -301,6 +301,7 @@ void GLWindow::SwapGLBuffers() { static u32 lastswaptime = 0; SwapBuffers(hDC); + //glClear(GL_COLOR_BUFFER_BIT); lastswaptime = timeGetTime(); } diff --git a/plugins/zzogl-pg/opengl/GLWinX11.cpp b/plugins/zzogl-pg/opengl/GLWinX11.cpp index 0477845251..39d8108ae5 100644 --- a/plugins/zzogl-pg/opengl/GLWinX11.cpp +++ b/plugins/zzogl-pg/opengl/GLWinX11.cpp @@ -227,7 +227,8 @@ void GLWindow::ToggleFullscreen() XUnlockDisplay(glDisplay); // Apply the change - XSync(glDisplay, False); + // Note: Xsync is not enough. All pending event must be flush. + XFlush(glDisplay); // update info structure GetWindowSize(); @@ -298,6 +299,7 @@ bool GLWindow::DisplayWindow(int _width, int _height) void GLWindow::SwapGLBuffers() { glXSwapBuffers(glDisplay, glWindow); + //glClear(GL_COLOR_BUFFER_BIT); } void GLWindow::SetTitle(char *strtitle) diff --git a/plugins/zzogl-pg/opengl/GS.h b/plugins/zzogl-pg/opengl/GS.h index 1ecc5c4bba..75a7a82aa6 100644 --- a/plugins/zzogl-pg/opengl/GS.h +++ b/plugins/zzogl-pg/opengl/GS.h @@ -117,6 +117,7 @@ enum PSM_value // Check target bit mode. PSMCT32 and 32Z return 0, 24 and 24Z - 1 // 16, 16S, 16Z, 16SZ -- 2, PSMT8 and 8H - 3, PSMT4, 4HL, 4HH -- 4. +// This code returns the same value on Z-textures, so texel storage mode is (BITMODE and !ISZTEX). inline int PSMT_BITMODE(int psm) {return (psm & 0x7);} inline int PSMT_BITS_NUM(int psm) @@ -168,6 +169,11 @@ inline bool PSMT_IS16Z(int psm) {return ((psm & 0x32) == 0x32);} // I'll have to look closer at it, because it'd seem like it'd return true for 24 bits. inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);} +// When color format is RGB24 (PSMCT24) or RGBA16 (PSMCT16 & 16S) alpha value expanded, based on +// TEXA register and AEM status. +inline int PSMT_ALPHAEXP(int psm) {return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S);} + + // This function updates the 6th and 5th bit of psm // 00 or 11 -> 00 ; 01 -> 10 ; 10 -> 01 inline int Switch_Top_Bytes (int X) { @@ -177,6 +183,19 @@ inline int Switch_Top_Bytes (int X) { return (X ^ 0x30); } +// How many pixel stored in 1 word. +// PSMT8 has 4 pixels per 32bit, PSMT4 has 8. All 16-bit textures are 2 pixel per bit. And all others are 1 pixel in texture. +inline int PIXELS_PER_WORD(int psm) +{ + if (psm == PSMT8) + return 4; + if (psm == PSMT4) + return 8; + if (PSMT_IS16BIT(psm)) + return 2; + return 1; +} + // Some storage formats could share the same memory block (2 textures in 1 format). This include following combinations: // PSMT24(24Z) with either 8H, 4HL, 4HH and PSMT4HL with PSMT4HH. // We use slightly different versions of this function on comparison with GSDX, Storage format XOR 0x30 made Z-textures @@ -488,6 +507,10 @@ typedef struct GIFRegDIMX dimx; GSMemory mem; GSClut clut_buffer; + int primNext(int inc = 1) + { + return ((primIndex + inc) % ARRAY_SIZE(gsvertex)); + } void setRGBA(u32 r, u32 g, u32 b, u32 a) { @@ -504,7 +527,7 @@ typedef struct vertexregs.z = z; vertexregs.f = f; gsvertex[primIndex] = vertexregs; - primIndex = (primIndex + 1) % ARRAY_SIZE(gsvertex); + primIndex = primNext(); } void add_vertex(u16 x, u16 y, u32 z) @@ -513,7 +536,7 @@ typedef struct vertexregs.y = y; vertexregs.z = z; gsvertex[primIndex] = vertexregs; - primIndex = (primIndex + 1) % ARRAY_SIZE(gsvertex); + primIndex = primNext(); } } GSinternal; @@ -589,14 +612,17 @@ inline float Clamp(float fx, float fmin, float fmax) return fx > fmax ? fmax : fx; } -// PSMT16, 16S have shorter color per pixel, also cluted textures with half storage. -inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) -{ - if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1)) - return true; - else - return false; -} +// Get pixel storage format from tex0. Clutted textures store pixels in cpsm format. +inline int PIXEL_STORAGE_FORMAT(const tex0Info& tex) { + if (PSMT_ISCLUT(tex.psm)) + return tex.cpsm; + else + return tex.psm; + } + +// If pixel storage format not PSMCT24 ot PSMCT32, then it is 16-bit. +// Z-textures have 0x30 upper bits, so we eliminate them by &&(~0x30) +inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) { return ((PIXEL_STORAGE_FORMAT(tex0) & (~0x30)) > 1); } //--------------------------- Inlines for bitwise ops //--------------------------- textures diff --git a/plugins/zzogl-pg/opengl/GSmain.cpp b/plugins/zzogl-pg/opengl/GSmain.cpp index 7e815ba685..75623d911a 100644 --- a/plugins/zzogl-pg/opengl/GSmain.cpp +++ b/plugins/zzogl-pg/opengl/GSmain.cpp @@ -36,7 +36,8 @@ using namespace std; #include "zerogs.h" #include "targets.h" -#include "ZeroGSShaders/zerogsshaders.h" +#include "ZZoglShaders.h" +#include "ZZoglFlushHack.h" #include "ZZoglFlushHack.h" #ifdef _MSC_VER @@ -64,12 +65,11 @@ bool SaveStateExists = true; // We could not know save slot status before first const char* SaveStateFile = NULL; // Name of SaveFile for access check. extern const char* s_aa[5]; -extern const char* s_naa[3]; extern const char* pbilinear[]; // statistics u32 g_nGenVars = 0, g_nTexVars = 0, g_nAlphaVars = 0, g_nResolve = 0; -#define VER 1 +#define VER 2 const unsigned char zgsversion = PS2E_GS_VERSION; unsigned char zgsrevision = 0; // revision and build gives plugin version unsigned char zgsbuild = VER; @@ -85,7 +85,7 @@ char *libraryName = "ZZ Ogl PG "; extern int g_nPixelShaderVer, g_nFrameRender, g_nFramesSkipped; -extern void ProcessMessages(); +extern void ProcessEvents(); extern void WriteAA(); extern void WriteBilinear(); @@ -304,7 +304,7 @@ s32 CALLBACK GSinit() { FUNCLOG - if (ZZLog::Open() == false) return -1; + ZZLog::Open(); ZZLog::WriteLn("Calling GSinit."); WriteTempRegs(); @@ -477,8 +477,7 @@ static __forceinline void SetGSTitle() SaveStateExists = true; sprintf(strtitle, "ZZ Open GL 0.%d.%d | %.1f fps | %s%s%s savestate %d%s | shaders %s | (%.1f)", zgsbuild, zgsminor, fFPS, - g_pInterlace[conf.interlace], g_pBilinear[conf.bilinear], - (conf.aa >= conf.negaa) ? (conf.aa ? s_aa[conf.aa - conf.negaa] : "") : (conf.negaa ? s_naa[conf.negaa - conf.aa] : ""), + g_pInterlace[conf.interlace], g_pBilinear[conf.bilinear], (conf.aa ? s_aa[conf.aa] : ""), CurrentSavestate, (SaveStateExists ? "" : "*"), g_pShaders[g_nPixelShaderVer], (ppf&0xfffff) / (float)UPDATE_FRAMES); @@ -515,7 +514,7 @@ void CALLBACK GSvsync(int interlace) // !interlace? Hmmm... Fixme. ZeroGS::RenderCRTC(!interlace); - ProcessMessages(); + ProcessEvents(); if (--nToNextUpdate <= 0) { diff --git a/plugins/zzogl-pg/opengl/Linux/Linux.cpp b/plugins/zzogl-pg/opengl/Linux/Linux.cpp index a19f81bc00..d32819e42b 100644 --- a/plugins/zzogl-pg/opengl/Linux/Linux.cpp +++ b/plugins/zzogl-pg/opengl/Linux/Linux.cpp @@ -390,8 +390,6 @@ void DisplayDialog() if (gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box)) != -1) conf.aa = gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box)); - conf.negaa = 0; - conf.log = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(log_check)); conf.bilinear = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(bilinear_check)); fake_options.widescreen = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widescreen_check)); diff --git a/plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp b/plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp index cecbc5e79a..ac7fa4dad0 100644 --- a/plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp +++ b/plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp @@ -16,6 +16,7 @@ + @@ -87,7 +88,6 @@ - @@ -154,9 +154,9 @@ + - + + diff --git a/plugins/zzogl-pg/opengl/ZZGl.h b/plugins/zzogl-pg/opengl/ZZGl.h index b6ca7c4a42..3ebd2e8da3 100644 --- a/plugins/zzogl-pg/opengl/ZZGl.h +++ b/plugins/zzogl-pg/opengl/ZZGl.h @@ -46,12 +46,6 @@ inline void* wglGetProcAddress(const char* x) #endif -#include -#include -#include -#include "zerogsmath.h" -#include "ZeroGSShaders/zerogsshaders.h" - extern u32 s_stencilfunc, s_stencilref, s_stencilmask; // Defines @@ -82,47 +76,10 @@ extern u32 s_stencilfunc, s_stencilref, s_stencilmask; // global alpha blending settings -extern GLenum g_internalFloatFmt; -extern GLenum g_internalRGBAFloatFmt; extern GLenum g_internalRGBAFloat16Fmt; -extern CGprogram g_vsprog, g_psprog; -extern CGparameter g_vparamPosXY[2], g_fparamFogColor; - -extern const char* ShaderCallerName; -extern const char* ShaderHandleName; - extern const GLenum primtype[8]; -inline void SetShaderCaller(const char* Name) -{ - ShaderCallerName = Name; -} - -inline void SetHandleName(const char* Name) -{ - ShaderHandleName = Name; -} - -extern void HandleCgError(CGcontext ctx, CGerror err, void* appdata); -extern void ZZcgSetParameter4fv(CGparameter param, const float* v, const char* name); - -#define SETVERTEXSHADER(prog) { \ - if( (prog) != g_vsprog ) { \ - cgGLBindProgram(prog); \ - g_vsprog = prog; \ - } \ -} \ - -#define SETPIXELSHADER(prog) { \ - if( (prog) != g_psprog ) { \ - cgGLBindProgram(prog); \ - g_psprog = prog; \ - } \ -} \ - - -#define SAFE_RELEASE_PROG(x) { if( (x) != NULL ) { cgDestroyProgram(x); x = NULL; } } #define SAFE_RELEASE_TEX(x) { if( (x) != 0 ) { glDeleteTextures(1, &(x)); x = 0; } } // inline for an extremely often used sequence @@ -138,7 +95,6 @@ inline void DisableAllgl() glColorMask(1, 1, 1, 1); } - //--------------------- Dummies #ifdef _WIN32 @@ -152,105 +108,8 @@ extern void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum); // ------------------------ Types ------------------------- -struct FRAGMENTSHADER -{ - FRAGMENTSHADER() : prog(0), sMemory(0), sFinal(0), sBitwiseANDX(0), sBitwiseANDY(0), sInterlace(0), sCLUT(0), sOneColor(0), sBitBltZ(0), - fTexAlpha2(0), fTexOffset(0), fTexDims(0), fTexBlock(0), fClampExts(0), fTexWrapMode(0), - fRealTexDims(0), fTestBlack(0), fPageOffset(0), fTexAlpha(0) {} - - CGprogram prog; - CGparameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT; - CGparameter sOneColor, sBitBltZ, sInvTexDims; - CGparameter fTexAlpha2, fTexOffset, fTexDims, fTexBlock, fClampExts, fTexWrapMode, fRealTexDims, fTestBlack, fPageOffset, fTexAlpha; - - void set_uniform_param(CGparameter &var, const char *name) - { - CGparameter p; - p = cgGetNamedParameter(prog, name); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p; - } - - bool set_texture(GLuint texobj, const char *name) - { - CGparameter p; - - p = cgGetNamedParameter(prog, name); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - { - cgGLSetTextureParameter(p, texobj); - cgGLEnableTextureParameter(p); - return true; - } - - return false; - } - - bool connect(CGparameter &tex, const char *name) - { - CGparameter p; - - p = cgGetNamedParameter(prog, name); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - { - cgConnectParameter(tex, p); - return true; - } - - return false; - } - - bool set_texture(CGparameter &tex, const char *name) - { - CGparameter p; - - p = cgGetNamedParameter(prog, name); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - { - //cgGLEnableTextureParameter(p); - tex = p; - return true; - } - - return false; - } - - bool set_shader_const(Vector v, const char *name) - { - CGparameter p; - - p = cgGetNamedParameter(prog, name); - - if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) - { - cgGLSetParameter4fv(p, v); - return true; - } - - return false; - } -}; - ///////////////////// // graphics resources -extern map mapGLExtensions; -//extern map mapShaderResources; - -struct VERTEXSHADER -{ - VERTEXSHADER() : prog(0), sBitBltPos(0), sBitBltTex(0) {} - - CGprogram prog; - CGparameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants -}; - -extern CGprofile cgvProf, cgfProf; -extern CGprogram pvs[16]; -extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS]; -extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2]; extern GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT // GL prototypes diff --git a/plugins/zzogl-pg/opengl/ZZKeyboard.cpp b/plugins/zzogl-pg/opengl/ZZKeyboard.cpp index b858e75954..4ca5514e37 100644 --- a/plugins/zzogl-pg/opengl/ZZKeyboard.cpp +++ b/plugins/zzogl-pg/opengl/ZZKeyboard.cpp @@ -21,7 +21,7 @@ #include "Util.h" #include "GS.h" -#include "ZeroGSShaders/zerogsshaders.h" +#include "ZZoglShaders.h" #include "Profile.h" #include "GLWin.h" @@ -34,7 +34,6 @@ extern u32 THR_KeyEvent; // value for passing out key events between threads extern bool THR_bShift, SaveStateExists; const char* s_aa[5] = { "AA none |", "AA 2x |", "AA 4x |", "AA 8x |", "AA 16x |" }; -const char* s_naa[3] = { "native res |", "res /2 |", "res /4 |" }; const char* pbilinear[] = { "off", "normal", "forced" }; @@ -96,7 +95,6 @@ void ProcessAASetting(bool reverse) void ProcessFPS() { FUNCLOG - extern bool g_bDisplayFPS; g_bDisplayFPS ^= 1; ZZLog::Debug_Log("Toggled FPS."); } @@ -112,31 +110,6 @@ void ProcessWireFrame() ZZLog::WriteToScreen(strtitle); } -void ProcessNegAASetting(bool reverse) -{ - FUNCLOG - - char strtitle[256]; - - if (reverse) - { - conf.negaa--; // -1 - if (conf.negaa > 2) conf.negaa = 2; // u8 in unsigned, so negative value is 255. - sprintf(strtitle, "down resolution - %s", s_naa[conf.negaa]); - ZeroGS::SetNegAA(conf.negaa); - } - else - { - conf.negaa++; - if (conf.negaa > 2) conf.negaa = 0; - sprintf(strtitle, "down resolution - %s", s_naa[conf.negaa]); - ZeroGS::SetNegAA(conf.negaa); - } - - ZZLog::WriteToScreen(strtitle); - SaveConfig(); -} - typedef struct GameHackStruct { const char HackName[40]; @@ -281,7 +254,7 @@ void WriteBilinear() } #ifdef _WIN32 -void ProcessMessages() +void ProcessEvents() { MSG msg; @@ -349,7 +322,7 @@ void ProcessMessages() #else // linux -void ProcessMessages() +void ProcessEvents() { FUNCLOG diff --git a/plugins/zzogl-pg/opengl/ZZLog.cpp b/plugins/zzogl-pg/opengl/ZZLog.cpp index 6ad727a9c5..82a7e1d69e 100644 --- a/plugins/zzogl-pg/opengl/ZZLog.cpp +++ b/plugins/zzogl-pg/opengl/ZZLog.cpp @@ -34,21 +34,16 @@ bool IsLogging() return (gsLog != NULL && conf.log); } -bool Open() +void Open() { - bool result = true; const std::string LogFile(s_strLogPath + "GSzzogl.log"); gsLog = fopen(LogFile.c_str(), "w"); if (gsLog != NULL) setvbuf(gsLog, NULL, _IONBF, 0); else - { SysMessage("Can't create log file %s\n", LogFile.c_str()); - result = false; - } - return result; } void Close() @@ -64,9 +59,11 @@ void SetDir(const char* dir) // Get the path to the log directory. s_strLogPath = (dir==NULL) ? "logs/" : dir; - // Reload the log file after updated the path - Close(); - Open(); + // Reload previously open log file + if (gsLog) { + Close(); + Open(); + } } void WriteToScreen(const char* pstr, u32 ms) @@ -167,9 +164,11 @@ void Greg_Log(const char *fmt, ...) va_start(list, fmt); - fprintf(gsLog, "GRegs: "); + if (IsLogging()) { + fprintf(gsLog, "GRegs: "); + vfprintf(gsLog, fmt, list); + } //fprintf(stderr,"GRegs: "); - if (IsLogging()) vfprintf(gsLog, fmt, list); //vfprintf(stderr, fmt, list); va_end(list); diff --git a/plugins/zzogl-pg/opengl/ZZLog.h b/plugins/zzogl-pg/opengl/ZZLog.h index 443e71435b..2a33ca269e 100644 --- a/plugins/zzogl-pg/opengl/ZZLog.h +++ b/plugins/zzogl-pg/opengl/ZZLog.h @@ -161,7 +161,6 @@ namespace ZeroGS { extern void AddMessage(const char* pstr, u32 ms); extern void SetAA(int mode); -extern void SetNegAA(int mode); extern bool Create(int width, int height); extern void Destroy(bool bD3D); extern void StartCapture(); @@ -172,7 +171,7 @@ namespace ZZLog { extern bool IsLogging(); void SetDir(const char* dir); -extern bool Open(); +extern void Open(); extern void Close(); extern void Message(const char *fmt, ...); extern void Log(const char *fmt, ...); diff --git a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp index aa3c966340..64a6b31374 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp @@ -23,6 +23,7 @@ //------------------ Includes #include "ZZoglCRTC.h" #include "GLWin.h" +#include "ZZoglShaders.h" using namespace ZeroGS; @@ -176,7 +177,6 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo) } } - // Something should be done before Renderering the picture. inline void RenderStartHelper(u32 bInterlace) { @@ -190,6 +190,7 @@ inline void RenderStartHelper(u32 bInterlace) // return; // } //#endif + if (conf.mrtdepth && pvs[8] == NULL) { conf.mrtdepth = 0; @@ -221,7 +222,7 @@ inline void RenderStartHelper(u32 bInterlace) glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); } - SETVERTEXSHADER(pvsBitBlt.prog); + ZZshSetVertexShader(pvsBitBlt.prog); glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); @@ -281,7 +282,7 @@ inline Vector RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS valpha.w = 1; } - ZZcgSetParameter4fv(prog->sOneColor, valpha, "g_fOneColor"); + ZZshSetParameter4fv(prog->sOneColor, valpha, "g_fOneColor"); return valpha; } @@ -294,8 +295,7 @@ inline void RenderCreateInterlaceTex(u32 bInterlace, int th, FRAGMENTSHADER* pro int interlacetex = CreateInterlaceTex(2 * th); - cgGLSetTextureParameter(prog->sInterlace, interlacetex); - cgGLEnableTextureParameter(prog->sInterlace); + ZZshGLSetTextureParameter(prog->sInterlace, interlacetex, "Interlace"); } // Well, do blending setup prior to second pass of half-frame drawing @@ -359,7 +359,7 @@ inline void RenderCRTC24helper(u32 bInterlace, int interlace, int psm) SetShaderCaller("RenderCRTC24helper"); // assume that data is already in ptexMem (do Resolve?) RenderGetForClip(bInterlace, interlace, psm, &ppsCRTC24[bInterlace]); - SETPIXELSHADER(ppsCRTC24[bInterlace].prog); + ZZshSetPixelShader(ppsCRTC24[bInterlace].prog); DrawTriangleArray(); } @@ -416,7 +416,7 @@ inline Vector RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace) v.w += 1.0f / (float)dh ; } - ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); return v; } @@ -440,7 +440,7 @@ inline Vector RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool v.w += 1.0f / conf.height; } - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); return v; } @@ -451,7 +451,7 @@ inline Vector RenderSetTargetBitTrans(int th) { SetShaderCaller("RenderSetTargetBitTrans"); Vector v = Vector(float(th), -float(th), float(th), float(th)); - ZZcgSetParameter4fv(pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans"); + ZZshSetParameter4fv(pvsBitBlt.fBitBltTrans, v, "g_fBitBltTrans"); return v; } @@ -469,7 +469,7 @@ inline Vector RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHAD v.y = 1.0f / (float)th; v.z = (float)0.0; v.w = -0.5f / (float)th; - ZZcgSetParameter4fv(prog->sInvTexDims, v, "g_fInvTexDims"); + ZZshSetParameter4fv(prog->sInvTexDims, v, "g_fInvTexDims"); } return v; @@ -554,11 +554,10 @@ inline void RenderCheckForTargets(tex0Info& texframe, list& list Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]); // inside vb[0]'s target area, so render that region only - cgGLSetTextureParameter(ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex); - cgGLEnableTextureParameter(ppsCRTCTarg[bInterlace].sFinal); + ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target"); RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTCTarg[bInterlace]); - SETPIXELSHADER(ppsCRTCTarg[bInterlace].prog); + ZZshSetPixelShader(ppsCRTCTarg[bInterlace].prog); DrawTriangleArray(); @@ -591,10 +590,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list& listT } // context has to be 0 - CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(texframe, 1); - - if ((pmemtarg == NULL) || (bInterlace >= 2)) - ZZLog::Error_Log("CRCR Check for memory shader fault."); + if (bInterlace >= 2) ZZLog::Error_Log("CRCR Check for memory shader fault."); //if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil); @@ -607,7 +603,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list& listT h1 = texframe.th; w2 = -0.5f; h2 = -0.5f; - SetTexVariablesInt(0, 2, texframe, pmemtarg, &ppsCRTC[bInterlace], 1); + SetTexVariablesInt(0, 2, texframe, false, &ppsCRTC[bInterlace], 1); } else { @@ -615,7 +611,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list& listT h1 = 1; w2 = -0.5f / (float)texframe.tw; h2 = -0.5f / (float)texframe.th; - SetTexVariablesInt(0, 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1); + SetTexVariablesInt(0, 0, texframe, false, &ppsCRTC[bInterlace], 1); } if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0); @@ -630,12 +626,9 @@ inline void RenderCheckForMemory(tex0Info& texframe, list& listT v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]); Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]); - cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex); - cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory); - + ZZshGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory"); RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]); - - SETPIXELSHADER(ppsCRTC[bInterlace].prog); + ZZshSetPixelShader(ppsCRTC[bInterlace].prog); DrawTriangleArray(); } diff --git a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp index 4c29a61741..14c903fdae 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp @@ -25,7 +25,7 @@ #include "zerogs.h" #include "GLWin.h" -#include "ZeroGSShaders/zerogsshaders.h" +#include "ZZoglShaders.h" #include "targets.h" // This include for windows resource file with Shaders #ifdef _WIN32 @@ -66,17 +66,6 @@ } #define GL_BLEND_SET() zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha) - -#define GL_STENCILFUNC(func, ref, mask) { \ - s_stencilfunc = func; \ - s_stencilref = ref; \ - s_stencilmask = mask; \ - glStencilFunc(func, ref, mask); \ -} - -#define GL_STENCILFUNC_SET() glStencilFunc(s_stencilfunc, s_stencilref, s_stencilmask) - -#define VB_BUFFERSIZE 0x400 #define VB_NUMBUFFERS 512 // ----------------- Types @@ -97,7 +86,6 @@ extern void KickDummy(); extern bool LoadEffects(); extern bool LoadExtraEffects(); extern FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); -VERTEXSHADER pvsBitBlt; GLuint vboRect = 0; vector g_vboBuffers; // VBOs for all drawing commands @@ -139,10 +127,9 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL; //------------------ variables //////////////////////////// // State parameters -float fiRendWidth, fiRendHeight; extern u8* s_lpShaderResources; -CGprogram pvs[16] = {NULL}; +ZZshProgram pvs[16] = {NULL}; // String's for shader file in developer mode #ifdef DEVBUILD @@ -167,10 +154,10 @@ int nLogoWidth, nLogoHeight; u32 s_ptexInterlace = 0; // holds interlace fields //------------------ Global Variables +int GPU_TEXWIDTH = 512; +float g_fiGPU_TEXWIDTH = 1/512.0f; int g_MaxTexWidth = 4096, g_MaxTexHeight = 4096; u32 s_uFramebuffer = 0; -CGprofile cgvProf, cgfProf; -int g_nPixelShaderVer = 0; // default RasterFont* font_p = NULL; float g_fBlockMult = 1; @@ -179,7 +166,6 @@ float g_fBlockMult = 1; u32 ptexBlocks = 0, ptexConv16to32 = 0; // holds information on block tiling u32 ptexBilinearBlocks = 0; u32 ptexConv32to16 = 0; -bool g_bDisplayMsg = 1; int g_nDepthBias = 0; //u32 g_bSaveFlushedFrame = 0; @@ -190,13 +176,10 @@ bool ZeroGS::IsGLExt(const char* szTargetExtension) return mapGLExtensions.find(string(szTargetExtension)) != mapGLExtensions.end(); } -inline bool -ZeroGS::Create_Window(int _width, int _height) +inline bool ZeroGS::Create_Window(int _width, int _height) { nBackbufferWidth = _width; nBackbufferHeight = _height; - fiRendWidth = 1.0f / nBackbufferWidth; - fiRendHeight = 1.0f / nBackbufferHeight; if (!GLWin.DisplayWindow(_width, _height)) return false; @@ -233,20 +216,9 @@ inline bool ZeroGS::CreateImportantCheck() ZZLog::Error_Log("*********\nZZogl: OGL WARNING: Need GL_EXT_secondary_color\nZZogl: *********"); bSuccess = false; } - - // load the effect & find the best profiles (if any) - if (cgGLIsProfileSupported(CG_PROFILE_ARBVP1) != CG_TRUE) - { - ZZLog::Error_Log("arbvp1 not supported."); - bSuccess = false; - } - - if (cgGLIsProfileSupported(CG_PROFILE_ARBFP1) != CG_TRUE) - { - ZZLog::Error_Log("arbfp1 not supported."); - bSuccess = false; - } - + + bSuccess &= ZZshCheckProfilesSupport(); + return bSuccess; } @@ -454,9 +426,6 @@ inline bool CreateFillExtensionsMap() return true; } - -const static char* g_pShaders[] = { "full", "reduced", "accurate", "accurate-reduced" }; - void LoadglFunctions() { GL_LOADFN(glIsRenderbufferEXT); @@ -478,6 +447,20 @@ void LoadglFunctions() GL_LOADFN(glGenerateMipmapEXT); } +inline bool TryBlockFormat(GLint fmt, const GLvoid* vBlockData) { + g_internalFloatFmt = fmt; + glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_FLOAT, vBlockData); + return (glGetError() == GL_NO_ERROR); +} + +inline bool TryBlinearFormat(GLint fmt32, GLint fmt16, const GLvoid* vBilinearData) { + g_internalRGBAFloatFmt = fmt32; + g_internalRGBAFloat16Fmt = fmt16; + glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, vBilinearData); + return (glGetError() == GL_NO_ERROR); +} + + bool ZeroGS::Create(int _width, int _height) { GLenum err = GL_NO_ERROR; @@ -487,7 +470,6 @@ bool ZeroGS::Create(int _width, int _height) Destroy(1); GSStateReset(); - cgSetErrorHandler(HandleCgError, NULL); g_RenderFormatType = RFT_float16; if (!Create_Window(_width, _height)) return false; @@ -501,10 +483,10 @@ bool ZeroGS::Create(int _width, int _height) // Limit the texture size supported to 8192. We do not need bigger texture. // Besides the following assertion is false when texture are too big. // ZZoglFlush.cpp:2349: assert(fblockstride >= 1.0f) - g_MaxTexWidth = min(8192, g_MaxTexWidth); + //g_MaxTexWidth = min(8192, g_MaxTexWidth); g_MaxTexHeight = g_MaxTexWidth / 4; - GPU_TEXWIDTH = g_MaxTexWidth / 8; + GPU_TEXWIDTH = min (g_MaxTexWidth/8, 1024); g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH; if (!CreateOpenShadersFile()) return false; @@ -628,43 +610,32 @@ bool ZeroGS::Create(int _width, int _height) glGenTextures(1, &ptexBlocks); glBindTexture(GL_TEXTURE_2D, ptexBlocks); - - g_internalFloatFmt = GL_RGBA32F; // This is OpenGL 3.0 standard format, so it should be implemented in new cards. - g_internalRGBAFloatFmt = GL_RGBA32F; - g_internalRGBAFloat16Fmt = GL_RGBA16F; - - glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_FLOAT, &vBlockData[0]); - - if (glGetError() != GL_NO_ERROR) - { - // try different internal format - g_internalFloatFmt = GL_ALPHA_FLOAT32_ATI; - glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_FLOAT, &vBlockData[0]); + + if (TryBlockFormat(GL_RGBA32F, &vBlockData[0])) + ZZLog::Error_Log("Use GL_RGBA32F for blockdata."); + else if (TryBlockFormat(GL_ALPHA_FLOAT32_ATI, &vBlockData[0])) + ZZLog::Error_Log("Use ATI_texture_float for blockdata."); + else if (TryBlockFormat(GL_ALPHA32F_ARB, &vBlockData[0])) + ZZLog::Error_Log("Use ARB_texture_float for blockdata."); + else + { // This case is bad. But for really old cards it could be nice. + g_fBlockMult = 65535.0f*(float)g_fiGPU_TEXWIDTH; + BLOCK::FillBlocks(vBlockData, vBilinearData, 0); + g_internalFloatFmt = GL_ALPHA16; - if (glGetError() != GL_NO_ERROR) - { - // This case is bad. But for really old cards it could be nice. - - g_fBlockMult = 65535.0f*(float)g_fiGPU_TEXWIDTH ; - BLOCK::FillBlocks(vBlockData, vBilinearData, 0); - g_internalFloatFmt = GL_ALPHA16 ; - // We store block data on u16 rather float numbers. It's not as precise, but ALPHA16 is OpenGL 2.0 standard - // and uses only 16 bit. Old zerogs use red channel, but it does not work. - - glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_UNSIGNED_SHORT, &vBlockData[0]); - if (glGetError() != GL_NO_ERROR) - { - ZZLog::Error_Log("Could not fill blocks."); - return false; - } - do_not_use_billinear = true; - ZZLog::Debug_Log("Using non-bilinear fill, quallity is outdated!"); - } - else - ZZLog::Debug_Log("Use ATI_texture_float for blockdata."); + // We store block data on u16 rather float numbers. It's not so preciese, but ALPHA16 is OpenGL 2.0 standart + // and use only 16 bit. Old zerogs use red channel, but it does not work. + + glTexImage2D(GL_TEXTURE_2D, 0, g_internalFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_ALPHA, GL_UNSIGNED_SHORT, &vBlockData[0]); + if( glGetError() != GL_NO_ERROR ) { + ZZLog::Error_Log("ZZogl ERROR: could not fill blocks"); + return false; + } + + do_not_use_billinear = true; + conf.bilinear = 0; + ZZLog::Error_Log("Using non-bilinear fill, quallity is outdated!"); } - else - ZZLog::Debug_Log("Use GL_RGBA32F for blockdata."); setTex2DFilters(GL_NEAREST); setTex2DWrap(GL_REPEAT); @@ -674,33 +645,15 @@ bool ZeroGS::Create(int _width, int _height) // fill in the bilinear blocks (main variant). glGenTextures(1, &ptexBilinearBlocks); glBindTexture(GL_TEXTURE_2D, ptexBilinearBlocks); - glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, &vBilinearData[0]); - - if (glGetError() != GL_NO_ERROR) - { - g_internalRGBAFloatFmt = GL_RGBA_FLOAT32_ATI; - g_internalRGBAFloat16Fmt = GL_RGBA_FLOAT16_ATI; - glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, &vBilinearData[0]); - - if (glGetError() != GL_NO_ERROR) - { - g_internalRGBAFloatFmt = GL_FLOAT_RGBA32_NV; - g_internalRGBAFloat16Fmt = GL_FLOAT_RGBA16_NV; - glTexImage2D(GL_TEXTURE_2D, 0, g_internalRGBAFloatFmt, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, GL_RGBA, GL_FLOAT, &vBilinearData[0]); - - if (glGetError() != GL_NO_ERROR) - { - ZZLog::Error_Log("Fill bilinear blocks failed!"); - return false; - } - else - ZZLog::Debug_Log("Fill bilinear blocks with NVidia_float."); - } - else - ZZLog::Debug_Log("Fill bilinear blocks with ATI_texture_float."); - } - else - ZZLog::Debug_Log("Fill bilinear blocks OK.!"); + + if (TryBlinearFormat(GL_RGBA32F, GL_RGBA16F, &vBilinearData[0])) + ZZLog::Error_Log("Fill bilinear blocks OK.!"); + else if (TryBlinearFormat(GL_RGBA_FLOAT32_ATI, GL_RGBA_FLOAT16_ATI, &vBilinearData[0])) + ZZLog::Error_Log("Fill bilinear blocks with ATI_texture_float."); + else if (TryBlinearFormat(GL_FLOAT_RGBA32_NV, GL_FLOAT_RGBA16_NV, &vBilinearData[0])) + ZZLog::Error_Log("ZZogl Fill bilinear blocks with NVidia_float."); + else + ZZLog::Error_Log("Fill bilinear blocks failed."); setTex2DFilters(GL_NEAREST); setTex2DWrap(GL_REPEAT); @@ -814,72 +767,7 @@ bool ZeroGS::Create(int _width, int _height) if (err != GL_NO_ERROR) bSuccess = false; - g_cgcontext = cgCreateContext(); - - cgvProf = CG_PROFILE_ARBVP1; - cgfProf = CG_PROFILE_ARBFP1; - - cgGLEnableProfile(cgvProf); - cgGLEnableProfile(cgfProf); - - cgGLSetOptimalOptions(cgvProf); - cgGLSetOptimalOptions(cgfProf); - - cgGLSetManageTextureParameters(g_cgcontext, CG_FALSE); - - //cgSetAutoCompile(g_cgcontext, CG_COMPILE_IMMEDIATE); - - g_fparamFogColor = cgCreateParameter(g_cgcontext, CG_FLOAT4); - g_vparamPosXY[0] = cgCreateParameter(g_cgcontext, CG_FLOAT4); - g_vparamPosXY[1] = cgCreateParameter(g_cgcontext, CG_FLOAT4); - - ZZLog::GS_Log("Creating effects."); - - B_G(LoadEffects(), return false); - - g_bDisplayMsg = 0; - - - // create a sample shader - clampInfo temp; - - memset(&temp, 0, sizeof(temp)); - - temp.wms = 3; - temp.wmt = 3; - - g_nPixelShaderVer = 0;//SHADER_ACCURATE; - - // test - bool bFailed; - - FRAGMENTSHADER* pfrag = LoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed); - - if (bFailed || pfrag == NULL) - { - g_nPixelShaderVer = SHADER_ACCURATE | SHADER_REDUCED; - - pfrag = LoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed); - - if (pfrag != NULL) - cgGLLoadProgram(pfrag->prog); - - if (bFailed || pfrag == NULL || cgGetError() != CG_NO_ERROR) - { - g_nPixelShaderVer = SHADER_REDUCED; - ZZLog::Error_Log("Basic shader test failed."); - } - } - - g_bDisplayMsg = 1; - - if (g_nPixelShaderVer & SHADER_REDUCED) conf.bilinear = 0; - - ZZLog::GS_Log("Creating extra effects."); - - B_G(LoadExtraEffects(), return false); - - ZZLog::GS_Log("Using %s shaders.", g_pShaders[g_nPixelShaderVer]); + if (!ZZshStartUsingShaders()) bSuccess = false; GL_REPORT_ERROR(); diff --git a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp index 0595754dc4..b9aee62bb1 100644 --- a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp @@ -26,6 +26,7 @@ #include "zerogs.h" #include "targets.h" #include "ZZoglFlushHack.h" +#include "ZZoglShaders.h" using namespace ZeroGS; @@ -119,14 +120,11 @@ void Draw(const VB& curvb) //------------------ variables extern int g_nDepthBias; -extern float g_fBlockMult; +extern float g_fBlockMult; // used for old cards, that do not support Alpha-32float textures. We store block data in u16 and use it. bool g_bUpdateStencil = 1; //u32 g_SaveFrameNum = 0; // ZZ -int GPU_TEXWIDTH = 512; -float g_fiGPU_TEXWIDTH = 1 / 512.0f; - -extern CGprogram g_psprog; // 2 -- ZZ +extern ZZshProgram g_psprog; // 2 -- ZZ // local alpha blending settings static GLenum s_rgbeq, s_alphaeq; // set by zgsBlendEquationSeparateEXT // ZZ @@ -201,8 +199,8 @@ namespace ZeroGS VB vb[2]; float fiTexWidth[2], fiTexHeight[2]; // current tex width and height -u8 s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set -u8 s_AAz = 0, s_AAw = 0; // if AAy is set, then AAx has to be set +//u8 s_AAx = 0, s_AAy = 0; // if AAy is set, then AAx has to be set +Point AA = {0,0}; int icurctx = -1; @@ -219,11 +217,11 @@ void ResetAlphaVariables(); inline void SetAlphaTestInt(pixTest curtest); -inline void RenderAlphaTest(const VB& curvb, CGparameter sOneColor); +inline void RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor); inline void RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting); inline void ProcessStencil(const VB& curvb); -inline void RenderFBA(const VB& curvb, CGparameter sOneColor); -inline void ProcessFBA(const VB& curvb, CGparameter sOneColor); // zz +inline void RenderFBA(const VB& curvb, ZZshParameter sOneColor); +inline void ProcessFBA(const VB& curvb, ZZshParameter sOneColor); // zz } @@ -249,6 +247,14 @@ inline void SetAlphaTest(const pixTest& curtest) glAlphaFunc(g_dwAlphaCmp[curtest.atst], AlphaReferedValue(curtest.aref)); } } + +// Return, if tcc, aem or psm mode told us, than Alpha test should be used +// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure +// that it's correct, psm -- color mode, +inline bool IsAlphaTestExpansion(tex0Info tex0) +{ + return (tex0.tcc && gs.texa.aem && PSMT_ALPHAEXP(PIXEL_STORAGE_FORMAT(tex0))); +} // Switch wireframe rendering off for first flush, so it's draw few solid primitives inline void SwitchWireframeOff() @@ -853,7 +859,7 @@ inline Vector FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRen // zoe2 if (PSMT_ISZTEX(ptextarg->psm)) vpageoffset.w = -1.0f; - ZZcgSetParameter4fv(pfragment->fPageOffset, vpageoffset, "g_fPageOffset"); + ZZshSetParameter4fv(pfragment->fPageOffset, vpageoffset, "g_fPageOffset"); return vpageoffset; } @@ -871,7 +877,7 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c v.y = 16.0f / (float)curvb.tex0.th; v.z = 0.5f * v.x; v.w = 0.5f * v.y; - ZZcgSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); + ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); } else if (shadertype == 4) { @@ -880,7 +886,7 @@ inline Vector FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c v.y = 16.0f / (float)ptextarg->fbh; v.z = -1; v.w = 8.0f / (float)ptextarg->fbh; - ZZcgSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); + ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); } return v; @@ -914,7 +920,7 @@ inline Vector FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& cu if (shadertype == 4) vTexDims.z += 8.0f; - ZZcgSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims"); + ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims"); return vTexDims; } @@ -951,11 +957,11 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext GLuint ptexclut = 0; - //int psm = GetTexCPSM(curvb.tex0); + //int psm = PIXEL_STORAGE_FORMAT(curvb.tex0); int shadertype = FlushGetShaderType(curvb, ptextarg, ptexclut); FRAGMENTSHADER* pfragment = LoadShadeEffect(shadertype, 0, curvb.curprim.fge, - IsAlphaTestExpansion(curvb), exactcolor, curvb.clamp, context, NULL); + IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL); Vector vpageoffset = FlushSetPageOffset(pfragment, shadertype, ptextarg); @@ -964,10 +970,7 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext Vector vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg); if (pfragment->sCLUT != NULL && ptexclut != 0) - { - cgGLSetTextureParameter(pfragment->sCLUT, ptexclut); - cgGLEnableTextureParameter(pfragment->sCLUT); - } + ZZshGLSetTextureParameter(pfragment->sCLUT, ptexclut, "CLUT"); FlushApplyResizeFilter(curvb, dwFilterOpts, ptextarg, context); @@ -997,7 +1000,7 @@ inline FRAGMENTSHADER* FlushMadeNewTarget(VB& curvb, int exactcolor, int context } FRAGMENTSHADER* pfragment = LoadShadeEffect(0, GetTexFilter(curvb.tex1), curvb.curprim.fge, - IsAlphaTestExpansion(curvb), exactcolor, curvb.clamp, context, NULL); + IsAlphaTestExpansion(curvb.tex0), exactcolor, curvb.clamp, context, NULL); if (pfragment == NULL) ZZLog::Error_Log("Could not find memory target shader."); @@ -1011,35 +1014,25 @@ inline void FlushSetTexture(VB& curvb, FRAGMENTSHADER* pfragment, CRenderTarget* SetTexVariables(context, pfragment); SetTexInt(context, pfragment, ptextarg == NULL); - // have to enable the texture parameters(curtest.atst= + // have to enable the texture parameters(curtest.atst) + if( curvb.ptexClamp[0] != 0 ) + ZZshGLSetTextureParameter(pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0"); + + if( curvb.ptexClamp[1] != 0 ) + ZZshGLSetTextureParameter(pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1"); + + if( pfragment->sMemory != NULL && s_ptexCurSet[context] != 0) + ZZshGLSetTextureParameter(pfragment->sMemory, s_ptexCurSet[context], "Clamp memory"); - if (curvb.ptexClamp[0] != 0) - { - cgGLSetTextureParameter(pfragment->sBitwiseANDX, curvb.ptexClamp[0]); - cgGLEnableTextureParameter(pfragment->sBitwiseANDX); - } - - if (curvb.ptexClamp[1] != 0) - { - cgGLSetTextureParameter(pfragment->sBitwiseANDY, curvb.ptexClamp[1]); - cgGLEnableTextureParameter(pfragment->sBitwiseANDY); - } - - if (pfragment->sMemory != NULL && s_ptexCurSet[context] != 0) - { - cgGLSetTextureParameter(pfragment->sMemory, s_ptexCurSet[context]); - cgGLEnableTextureParameter(pfragment->sMemory); - } } -// Reset programm and texture variables; +// Reset program and texture variables; inline void FlushBindProgramm(FRAGMENTSHADER* pfragment, int context) { vb[context].bTexConstsSync = 0; vb[context].bVarsTexSync = 0; - cgGLBindProgram(pfragment->prog); - g_psprog = pfragment->prog; + ZZshSetPixelShader(pfragment->prog); } inline FRAGMENTSHADER* FlushRendererStage(VB& curvb, u32& dwFilterOpts, CRenderTarget* ptextarg, int exactcolor, int context) @@ -1072,8 +1065,8 @@ inline FRAGMENTSHADER* FlushRendererStage(VB& curvb, u32& dwFilterOpts, CRenderT GL_REPORT_ERRORD(); // set the shaders - SetShaderCaller("FlushRendererStage") ; - SETVERTEXSHADER(pvs[2 * ((curvb.curprim._val >> 1) & 3) + 8 * s_bWriteDepth + context]); + SetShaderCaller("FlushRendererStage"); + ZZshSetVertexShader(pvs[2 * ((curvb.curprim._val >> 1) & 3) + 8 * s_bWriteDepth + context]); FlushBindProgramm(pfragment, context); GL_REPORT_ERRORD(); @@ -1116,9 +1109,6 @@ inline void AlphaSetDepthTest(VB& curvb, const pixTest curtest, FRAGMENTSHADER* GL_ZTEST(curtest.zte); -// glEnable (GL_POLYGON_OFFSET_FILL); -// glPolygonOffset (-1., -1.); - if (s_bWriteDepth) { if (!curvb.zbuf.zmsk) @@ -1180,13 +1170,13 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf v.w *= 255; } - ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); } else { // not using blending so set to defaults Vector v = exactcolor ? Vector(1, 510 * 255.0f / 256.0f, 0, 0) : Vector(1, 2 * 255.0f / 256.0f, 0, 0); - ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); } @@ -1277,7 +1267,7 @@ inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor) if (exactcolor) v.y *= 255; - ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); Draw(curvb); @@ -1346,7 +1336,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE if (exactcolor) { v.y *= 255; v.w *= 255; } - ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); glEnable(GL_BLEND); GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA); @@ -1370,7 +1360,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE if (exactcolor) v.y *= 255; - ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); Draw(curvb); @@ -1422,7 +1412,7 @@ inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsin glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); Vector v = Vector(0, exactcolor ? 510.0f : 2.0f, 0, 0); - ZZcgSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor"); Draw(curvb); // don't need to restore @@ -1469,12 +1459,11 @@ inline void AlphaSaveTarget(VB& curvb) //#endif // char str[255]; // sprintf(str, "frames/frame%.4d.tga", g_SaveFrameNum++); -// if( (g_bSaveFlushedFrame & 2) ) { -// //glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 ); // switch to the backbuffer -// //glFlush(); -// //SaveTexture("tex.jpg", GL_TEXTURE_RECTANGLE_NV, curvb.prndr->ptex, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh)); -// SaveRenderTarget(str, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh), 0); -// } + +// //glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, 0 ); // switch to the backbuffer +// //glFlush(); +// //SaveTexture("tex.jpg", GL_TEXTURE_RECTANGLE_NV, curvb.prndr->ptex, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh)); +// SaveRenderTarget(str, RW(curvb.prndr->fbw), RH(curvb.prndr->fbh), 0); // } #endif } @@ -1500,7 +1489,7 @@ inline void AlphaColorClamping(VB& curvb, const pixTest curtest) SetShaderCaller("AlphaColorClamping"); - SETPIXELSHADER(ppsOne.prog); + ZZshSetPixelShader(ppsOne.prog); GL_BLEND_RGB(GL_ONE, GL_ONE); float f; @@ -1508,7 +1497,7 @@ inline void AlphaColorClamping(VB& curvb, const pixTest curtest) if (bAlphaClamping & 1) // min { f = 0; - ZZcgSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor"); + ZZshSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor"); GL_BLENDEQ_RGB(GL_MAX_EXT); Draw(curvb); } @@ -1517,7 +1506,7 @@ inline void AlphaColorClamping(VB& curvb, const pixTest curtest) if (bAlphaClamping & 2) // max { f = 1; - ZZcgSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor"); + ZZshSetParameter4fv(ppsOne.sOneColor, &f, "g_fOneColor"); GL_BLENDEQ_RGB(GL_MIN_EXT); Draw(curvb); } @@ -1615,7 +1604,7 @@ void ZeroGS::FlushBoth() Flush(1); } -inline void ZeroGS::RenderFBA(const VB& curvb, CGparameter sOneColor) +inline void ZeroGS::RenderFBA(const VB& curvb, ZZshParameter sOneColor) { // add fba to all pixels GL_STENCILFUNC(GL_ALWAYS, STENCIL_FBA, 0xff); @@ -1636,7 +1625,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, CGparameter sOneColor) Vector v(1,2,0,0); - ZZcgSetParameter4fv(sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(sOneColor, v, "g_fOneColor"); Draw(curvb); @@ -1659,7 +1648,7 @@ inline void ZeroGS::RenderFBA(const VB& curvb, CGparameter sOneColor) GL_ZTEST(curvb.test.zte); } -__forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, CGparameter sOneColor) +__forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor) { if (!g_bUpdateStencil) return; @@ -1675,7 +1664,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, CGparameter sOneColo Vector v(1,2,0,0); - ZZcgSetParameter4fv(sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(sOneColor, v, "g_fOneColor"); // or a 1 to the stencil buffer wherever alpha passes glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); @@ -1699,7 +1688,7 @@ __forceinline void ZeroGS::RenderAlphaTest(const VB& curvb, CGparameter sOneColo if (curvb.test.ate && curvb.test.atst > 1 && curvb.test.aref > 0x80) { v = Vector(1,1,0,0); - ZZcgSetParameter4fv(sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(sOneColor, v, "g_fOneColor"); glAlphaFunc(g_dwAlphaCmp[curvb.test.atst], AlphaReferedValue(curvb.test.aref)); } @@ -1763,7 +1752,7 @@ inline void ZeroGS::ProcessStencil(const VB& curvb) SetShaderCaller("ProcessStencil"); - SETPIXELSHADER(ppsOne.prog); + ZZshSetPixelShader(ppsOne.prog); Draw(curvb); // process when alpha >= 0xff @@ -1797,7 +1786,7 @@ inline void ZeroGS::ProcessStencil(const VB& curvb) glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); } -__forceinline void ZeroGS::ProcessFBA(const VB& curvb, CGparameter sOneColor) +__forceinline void ZeroGS::ProcessFBA(const VB& curvb, ZZshParameter sOneColor) { if ((curvb.frame.fbm&0x80000000)) return; @@ -1823,8 +1812,8 @@ __forceinline void ZeroGS::ProcessFBA(const VB& curvb, CGparameter sOneColor) GL_BLENDEQ_ALPHA(GL_FUNC_ADD); float f = 1; - ZZcgSetParameter4fv(sOneColor, &f, "g_fOneColor"); - SETPIXELSHADER(ppsOne.prog); + ZZshSetParameter4fv(sOneColor, &f, "g_fOneColor"); + ZZshSetPixelShader(ppsOne.prog); Draw(curvb); glDisable(GL_ALPHA_TEST); @@ -1980,13 +1969,13 @@ void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint) { if (vb[context].pmemtarg != pmemtarg) { - SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, pmemtarg, pfragment, s_bForceTexFlush); + SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, true, pfragment, s_bForceTexFlush); vb[context].bVarsTexSync = true; } } else { - SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, pmemtarg, pfragment, s_bForceTexFlush); + SetTexVariablesInt(context, GetTexFilter(vb[context].tex1), tex0, false, pfragment, s_bForceTexFlush); vb[context].bVarsTexSync = true; INC_TEXVARS(); @@ -2081,10 +2070,10 @@ void ZeroGS::SetTexClamping(int context, FRAGMENTSHADER* pfragment) } if (pfragment->fTexWrapMode != 0) - ZZcgSetParameter4fv(pfragment->fTexWrapMode, v, "g_fTexWrapMode"); + ZZshSetParameter4fv(pfragment->fTexWrapMode, v, "g_fTexWrapMode"); if (pfragment->fClampExts != 0) - ZZcgSetParameter4fv(pfragment->fClampExts, v2, "g_fClampExts"); + ZZshSetParameter4fv(pfragment->fClampExts, v2, "g_fClampExts"); } @@ -2122,9 +2111,9 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) Vector valpha, valpha2 ; // if clut, use the frame format - int psm = GetTexCPSM(tex0); + int psm = PIXEL_STORAGE_FORMAT(tex0); -// printf ( "A %d psm, is-clut %d. cpsm %d | %d %d\n", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc ); +// ZZLog::Error_Log( "A %d psm, is-clut %d. cpsm %d | %d %d", psm, PSMT_ISCLUT(psm), tex0.cpsm, tex0.tfx, tex0.tcc ); Vector vblack; vblack.x = vblack.y = vblack.z = vblack.w = 10; @@ -2149,7 +2138,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) valpha2.z = (tex0.tfx != 1) * 2 ; valpha2.w = (tex0.tfx == 0) ; - if (tex0.tcc == 0 || !nNeedAlpha(psm)) + if (tex0.tcc == 0 || !PSMT_ALPHAEXP(psm)) { valpha.x = 0 ; valpha.y = (!!tex0.tcc) * (1 + (tex0.tfx == 0)) ; @@ -2157,7 +2146,8 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) else { valpha.x = (gs.texa.fta[0]) * (1 + (tex0.tfx == 0)) ; - valpha.y = (gs.texa.fta[psm!=1] - gs.texa.fta[0]) * (1 + (tex0.tfx == 0)) ; + valpha.y = (gs.texa.fta[psm != PSMCT24] - gs.texa.fta[0]) * (1 + (tex0.tfx == 0)) ; + } valpha.z = (tex0.tfx >= 3) ; @@ -2206,7 +2196,7 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) valpha4.z = 0; valpha4.w = 0; } - if( nNeedAlpha(psm) ) { + if( PSMT_ALPHAEXP(psm) ) { if( tex0.tfx == 0 ) { // make sure alpha is mult by two when the output is Cv = Ct*Cf @@ -2241,26 +2231,26 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) } if ( equal_vectors(valpha, valpha3) && equal_vectors(valpha2, valpha4) ) { - if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm)] == 0) { - printf ( "Good issue %d %d %d %d\n", tex0.tfx, tex0.tcc, psm, nNeedAlpha(psm) ); - CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm) ] = 1; + if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] == 0) { + printf ( "Good issue %d %d %d %d\n", tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm) ); + CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm) ] = 1; } } - else if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm)] == -1) { + else if (CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] == -1) { printf ("Bad array, %d %d %d %d\n\tolf valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n\tnew valpha %f, %f, %f, %f : valpha2 %f %f %f %f\n", - tex0.tfx, tex0.tcc, psm, nNeedAlpha(psm), + tex0.tfx, tex0.tcc, psm, PSMT_ALPHAEXP(psm), valpha3.x, valpha3.y, valpha3.z, valpha3.w, valpha4.x, valpha4.y, valpha4.z, valpha4.w, valpha.x, valpha.y, valpha.z, valpha.w, valpha2.x, valpha2.y, valpha2.z, valpha2.w); - CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][nNeedAlpha(psm)] = -1 ; + CheckTexArray[tex0.tfx][tex0.tcc][psm!=1][PSMT_ALPHAEXP(psm)] = -1 ; } // Test;*/ - ZZcgSetParameter4fv(pfragment->fTexAlpha, valpha, "g_fTexAlpha"); - ZZcgSetParameter4fv(pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2"); + ZZshSetParameter4fv(pfragment->fTexAlpha, valpha, "g_fTexAlpha"); + ZZshSetParameter4fv(pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2"); - if (tex0.tcc && gs.texa.aem && nNeedAlpha(psm)) - ZZcgSetParameter4fv(pfragment->fTestBlack, vblack, "g_fTestBlack"); + if (IsAlphaTestExpansion(tex0)) + ZZshSetParameter4fv(pfragment->fTestBlack, vblack, "g_fTestBlack"); SetTexClamping(context, pfragment); @@ -2276,17 +2266,20 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment) } } -void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, CMemoryTarget* pmemtarg, FRAGMENTSHADER* pfragment, int force) +void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force) { FUNCLOG Vector v; - assert(pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL); + CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(tex0, 1); + assert( pmemtarg != NULL && pfragment != NULL && pmemtarg->ptex != NULL); if (pmemtarg == NULL || pfragment == NULL || pmemtarg->ptex == NULL) { - printf("SetTexVariablesInt error\n"); + ZZLog::Error_Log("SetTexVariablesInt error."); return; } + + if (CheckVB && vb[context].pmemtarg == pmemtarg) return; SetShaderCaller("SetTexVariablesInt"); @@ -2303,9 +2296,9 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, v.w = 1.0f / (float)fh; if (pfragment->fRealTexDims) - ZZcgSetParameter4fv(pfragment->fRealTexDims, v, "g_fRealTexDims"); + ZZshSetParameter4fv(pfragment->fRealTexDims, v, "g_fRealTexDims"); else - ZZcgSetParameter4fv(cgGetNamedParameter(pfragment->prog, "g_fRealTexDims"), v, "g_fRealTexDims"); + ZZshSetParameter4fv(cgGetNamedParameter(pfragment->prog,"g_fRealTexDims"),v, "g_fRealTexDims"); } if (m_Blocks[tex0.psm].bpp == 0) @@ -2359,11 +2352,11 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, v.z *= b.bpp * (1 / 32.0f); } - ZZcgSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims"); + ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims"); -// ZZcgSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from Vector to float[4] is ok. - ZZcgSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock"); - ZZcgSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); +// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from Vector to float[4] is ok. + ZZshSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock"); + ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset"); // get hardware texture dims //int texheight = (pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult; @@ -2383,7 +2376,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, v.w = 0.5f;*/ v.w = 0.5f; - ZZcgSetParameter4fv(pfragment->fPageOffset, v, "g_fPageOffset"); + ZZshSetParameter4fv(pfragment->fPageOffset, v, "g_fPageOffset"); if (force) s_ptexCurSet[context] = pmemtarg->ptex->tex; diff --git a/plugins/zzogl-pg/opengl/ZZoglShaders.cpp b/plugins/zzogl-pg/opengl/ZZoglShaders.cpp index 6f9ddf5a63..658d763de5 100644 --- a/plugins/zzogl-pg/opengl/ZZoglShaders.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglShaders.cpp @@ -21,11 +21,44 @@ //------------------- Includes #include "zerogs.h" -#include "ZeroGSShaders/zerogsshaders.h" +#include "ZZoglShaders.h" #include "zpipe.h" // ----------------- Defines +#define TEXWRAP_REPEAT 0 +#define TEXWRAP_CLAMP 1 +#define TEXWRAP_REGION_REPEAT 2 +#define TEXWRAP_REPEAT_CLAMP 3 + +#define SH_WRITEDEPTH 0x2000 // depth is written +#define SH_CONTEXT1 0x1000 // context1 is used + +#define SH_REGULARVS 0x8000 +#define SH_TEXTUREVS 0x8001 +#define SH_REGULARFOGVS 0x8002 +#define SH_TEXTUREFOGVS 0x8003 +#define SH_REGULARPS 0x8004 +#define SH_REGULARFOGPS 0x8005 +#define SH_BITBLTVS 0x8006 +#define SH_BITBLTPS 0x8007 +#define SH_BITBLTDEPTHPS 0x8009 +#define SH_CRTCTARGPS 0x800a +#define SH_CRTCPS 0x800b +#define SH_CRTC24PS 0x800c +#define SH_ZEROPS 0x800e +#define SH_BASETEXTUREPS 0x800f +#define SH_BITBLTAAPS 0x8010 +#define SH_CRTCTARGINTERPS 0x8012 +#define SH_CRTCINTERPS 0x8013 +#define SH_CRTC24INTERPS 0x8014 +#define SH_BITBLTDEPTHMRTPS 0x8016 +#define SH_CONVERT16TO32PS 0x8020 +#define SH_CONVERT32TO16PS 0x8021 +#define SH_CRTC_NEARESTPS 0x8022 +#define SH_CRTCINTER_NEARESTPS 0x8023 + + using namespace ZeroGS; //------------------ Constants @@ -35,24 +68,40 @@ namespace ZeroGS { FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; +VERTEXSHADER pvsBitBlt; } // Debug variable, store name of the function that call the shader. const char* ShaderCallerName = ""; const char* ShaderHandleName = ""; -extern u32 ptexBlocks; // holds information on block tiling -extern u32 ptexConv16to32; +extern u32 ptexBlocks; // holds information on block tiling. Its texture number in OpenGL -- if 0 than such texture +extern u32 ptexConv16to32; // does not exist. This textures should be created on start and released on finish. extern u32 ptexConv32to16; bool g_bCRTCBilinear = true; u8* s_lpShaderResources = NULL; map mapShaderResources; -CGcontext g_cgcontext; +ZZshContext g_cgcontext; +ZZshProfile cgvProf, cgfProf; +int g_nPixelShaderVer = 0; // default //------------------ Code +bool ZZshCheckProfilesSupport() { + // load the effect, find the best profiles (if any) + if (cgGLIsProfileSupported(CG_PROFILE_ARBVP1) != CG_TRUE) { + ZZLog::Error_Log("arbvp1 not supported."); + return false; + } + if (cgGLIsProfileSupported(CG_PROFILE_ARBFP1) != CG_TRUE) { + ZZLog::Error_Log("arbfp1 not supported."); + return false; + } + return true; +} + // Error handler. Setup in ZZogl_Create once. -void HandleCgError(CGcontext ctx, CGerror err, void* appdata) +void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata) { ZZLog::Error_Log("%s->%s: %s", ShaderCallerName, ShaderHandleName, cgGetErrorString(err)); const char* listing = cgGetLastListing(g_cgcontext); @@ -60,14 +109,106 @@ void HandleCgError(CGcontext ctx, CGerror err, void* appdata) if (listing != NULL) ZZLog::Debug_Log(" Last listing: %s", listing); } +bool ZZshStartUsingShaders() { + cgSetErrorHandler(HandleCgError, NULL); + g_cgcontext = cgCreateContext(); + + cgvProf = CG_PROFILE_ARBVP1; + cgfProf = CG_PROFILE_ARBFP1; + cgGLEnableProfile(cgvProf); + cgGLEnableProfile(cgfProf); + cgGLSetOptimalOptions(cgvProf); + cgGLSetOptimalOptions(cgfProf); + + cgGLSetManageTextureParameters(g_cgcontext, CG_FALSE); + //cgSetAutoCompile(g_cgcontext, CG_COMPILE_IMMEDIATE); + + g_fparamFogColor = cgCreateParameter(g_cgcontext, CG_FLOAT4); + g_vparamPosXY[0] = cgCreateParameter(g_cgcontext, CG_FLOAT4); + g_vparamPosXY[1] = cgCreateParameter(g_cgcontext, CG_FLOAT4); + + + ZZLog::Debug_Log("Creating effects."); + B_G(LoadEffects(), return false); + + // create a sample shader + clampInfo temp; + memset(&temp, 0, sizeof(temp)); + temp.wms = 3; temp.wmt = 3; + + g_nPixelShaderVer = 0;//SHADER_ACCURATE; + // test + bool bFailed; + FRAGMENTSHADER* pfrag = LoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed); + if( bFailed || pfrag == NULL ) { + g_nPixelShaderVer = SHADER_ACCURATE|SHADER_REDUCED; + + pfrag = LoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed); + if( pfrag != NULL ) + cgGLLoadProgram(pfrag->prog); + if( bFailed || pfrag == NULL || cgGetError() != CG_NO_ERROR ) { + g_nPixelShaderVer = SHADER_REDUCED; + ZZLog::Error_Log("Basic shader test failed."); + } + } + + if (g_nPixelShaderVer & SHADER_REDUCED) + conf.bilinear = 0; + + ZZLog::Debug_Log("Creating extra effects."); + B_G(LoadExtraEffects(), return false); + + ZZLog::Debug_Log("using %s shaders.", g_pShaders[g_nPixelShaderVer]); + return true; +} + +// Disable CG +void ZZshGLDisableProfile() { + cgGLDisableProfile(cgvProf); + cgGLDisableProfile(cgfProf); +} +//Enable CG +void ZZshGLEnableProfile() { + cgGLEnableProfile(cgvProf); + cgGLEnableProfile(cgfProf); +} + // This is a helper of cgGLSetParameter4fv, made for debugging purposes. // The name could be any string. We must use it on compilation time, because the erronious handler does not // return it. -void ZZcgSetParameter4fv(CGparameter param, const float* v, const char* name) +void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name) { ShaderHandleName = name; cgGLSetParameter4fv(param, v); } + +// The same function for texture, also to cgGLEnable +void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name) { + ShaderHandleName = name; + cgGLSetTextureParameter(param, texobj); + cgGLEnableTextureParameter(param); +} + +// Used sometimes for color 1. +void ZZshDefaultOneColor( FRAGMENTSHADER ptr ) { + ShaderHandleName = "Set Default One color"; + Vector v = Vector ( 1, 1, 1, 1 ); + ZZshSetParameter4fv( ptr.sOneColor, v, "DefaultOne"); +} + +void ZZshSetVertexShader(ZZshShader prog) { + if ((prog) != g_vsprog) { + cgGLBindProgram(prog); + g_vsprog = prog; + } +} + +void ZZshSetPixelShader(ZZshShader prog) { + if ((prog) != g_psprog) { + cgGLBindProgram(prog); + g_psprog = prog; + } +} void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type) { @@ -126,9 +267,9 @@ void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type) static bool outdated_shaders = false; -void SetupVertexProgramParameters(CGprogram prog, int context) +void SetupVertexProgramParameters(ZZshProgram prog, int context) { - CGparameter p; + ZZshParameter p; p = cgGetNamedParameter(prog, "g_fPosXY"); diff --git a/plugins/zzogl-pg/opengl/ZZoglShaders.h b/plugins/zzogl-pg/opengl/ZZoglShaders.h new file mode 100644 index 0000000000..41b7511e51 --- /dev/null +++ b/plugins/zzogl-pg/opengl/ZZoglShaders.h @@ -0,0 +1,243 @@ +/* ZZ Open GL graphics plugin + * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com + * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __ZEROGS_SHADERS_H__ +#define __ZEROGS_SHADERS_H__ + +// -- Not very important things, but we keep it to enumerate shader +#define NUM_FILTERS 2 // texture filtering +#define NUM_TYPES 5 // types of texture read modes +#define NUM_TEXWRAPS 4 // texture wrapping +#define NUM_SHADERS (NUM_FILTERS*NUM_TYPES*NUM_TEXWRAPS*32) // # shaders for a given ps + +// Just bitmask for different type of shaders +#define SHADER_REDUCED 1 // equivalent to ps2.0 +#define SHADER_ACCURATE 2 // for older cards with less accurate math (ps2.x+) +// For output +const static char* g_pShaders[] = { "full", "reduced", "accurate", "accurate-reduced" }; + +#define NVIDIA_CG_API +// --------------------------- API abstraction level -------------------------------- + +#ifdef NVIDIA_CG_API // Code for NVIDIA cg-toolkit API + +#include +#include +#define ZZshProgram CGprogram +#define ZZshShader CGprogram +#define ZZshShaderLink CGprogram +#define ZZshParameter CGparameter +#define ZZshContext CGcontext +#define ZZshProfile CGprofile +#define ZZshError CGerror +#define pZero 0 // Zero parameter +#define sZero 0 // Zero program + +#define SAFE_RELEASE_PROG(x) { if( (x) != NULL ) { cgDestroyProgram(x); x = NULL; } } +inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); } + +#endif // end NVIDIA cg-toolkit API + +const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL }; +const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" }; + +enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC}; +// We have "compatible" shaders, as RegularFogVS and RegularFogPS, if we don't need to worry about incompatible shaders. +// It's used only in GLSL mode. + +// ------------------------- Variables ------------------------------- +extern int g_nPixelShaderVer; +extern ZZshShaderLink pvs[16], g_vsprog, g_psprog; +extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor; + +#define MAX_ACTIVE_UNIFORMS 600 +#define MAX_ACTIVE_SHADERS 400 + +struct FRAGMENTSHADER +{ + FRAGMENTSHADER() : prog(sZero), Shader(0), sMemory(pZero), sFinal(pZero), sBitwiseANDX(pZero), sBitwiseANDY(pZero), sInterlace(pZero), sCLUT(pZero), sOneColor(pZero), sBitBltZ(pZero), + fTexAlpha2(pZero), fTexOffset(pZero), fTexDims(pZero), fTexBlock(pZero), fClampExts(pZero), fTexWrapMode(pZero), + fRealTexDims(pZero), fTestBlack(pZero), fPageOffset(pZero), fTexAlpha(pZero) {} + + ZZshShaderLink prog; // it links to the FRAGMENTSHADER structure, for compatibility between GLSL and CG. + ZZshShader Shader; // GLSL store shaders not as ready programs, but as shader compiled objects. VS and PS should be linked together to + // make a program. + ZZshShaderType ShaderType; // Not every PS and VS are used together, only compatible ones. + + ZZshParameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT; + ZZshParameter sOneColor, sBitBltZ, sInvTexDims; + ZZshParameter fTexAlpha2, fTexOffset, fTexDims, fTexBlock, fClampExts, fTexWrapMode, fRealTexDims, fTestBlack, fPageOffset, fTexAlpha; + + int ParametersStart, ParametersFinish; // this is part of UniformsIndex array in which parameters of this shader asre stored. The last one is ParametersFinish-1 + +#ifdef _DEBUG + string filename; +#endif + + void set_uniform_param(ZZshParameter &var, const char *name) + { + ZZshParameter p; + p = cgGetNamedParameter(prog, name); + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p; + } + + bool set_texture(GLuint texobj, const char *name) + { + ZZshParameter p; + + p = cgGetNamedParameter(prog, name); + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) + { + cgGLSetTextureParameter(p, texobj); + cgGLEnableTextureParameter(p); + return true; + } + + return false; + } + + bool connect(ZZshParameter &tex, const char *name) + { + ZZshParameter p; + + p = cgGetNamedParameter(prog, name); + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) + { + cgConnectParameter(tex, p); + return true; + } + + return false; + } + + bool set_texture(ZZshParameter &tex, const char *name) + { + ZZshParameter p; + + p = cgGetNamedParameter(prog, name); + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) + { + //cgGLEnableTextureParameter(p); + tex = p; + return true; + } + + return false; + } + + bool set_shader_const(Vector v, const char *name) + { + ZZshParameter p; + + p = cgGetNamedParameter(prog, name); + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) + { + cgGLSetParameter4fv(p, v); + return true; + } + + return false; + } +}; + +struct VERTEXSHADER +{ + VERTEXSHADER() : prog(sZero), Shader(0), sBitBltPos(pZero), sBitBltTex(pZero) {} + + ZZshShaderLink prog; + ZZshShader Shader; + ZZshShaderType ShaderType; + + ZZshParameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants + + int ParametersStart, ParametersFinish; +}; + +namespace ZeroGS { + // Shaders variables + extern Vector g_vdepth; + extern Vector vlogz; + extern VERTEXSHADER pvsBitBlt; + extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; // ppsOne used to stop using shaders for draw + extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; + bool LoadEffects(); + bool LoadExtraEffects(); + FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); + + // only sets a limited amount of state (for Update) + void SetTexClamping(int context, FRAGMENTSHADER* pfragment); + void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force); +} + +// ------------------------- Variables ------------------------------- + +extern u8* s_lpShaderResources; +extern ZZshProfile cgvProf, cgfProf; +extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS]; +extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2]; + +// ------------------------- Functions ------------------------------- + +#ifdef NVIDIA_CG_API +inline bool ZZshExistProgram(FRAGMENTSHADER* pf) {return (pf->prog != NULL); }; // We don't check ps != NULL, so be warned, +inline bool ZZshExistProgram(VERTEXSHADER* pf) {return (pf->prog != NULL); }; +inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog != NULL); }; +#endif + +extern const char* ShaderCallerName; +extern const char* ShaderHandleName; + +inline void SetShaderCaller(const char* Name) { + ShaderCallerName = Name; +} + +inline void SetHandleName(const char* Name) { + ShaderHandleName = Name; +} + +inline void ResetShaderCounters() { +// g_vsprog = g_psprog = sZero; +} + +extern bool ZZshCheckProfilesSupport(); +extern bool ZZshStartUsingShaders(); +extern void ZZshGLDisableProfile(); +extern void ZZshGLEnableProfile(); +extern void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name); +extern void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name); +extern void ZZshDefaultOneColor( FRAGMENTSHADER ptr ); +extern void ZZshSetVertexShader(ZZshShader prog); +extern void ZZshSetPixelShader(ZZshShader prog); + +inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps) +{ + return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps); +} + +struct SHADERHEADER +{ + unsigned int index, offset, size; // if highest bit of index is set, pixel shader +}; + +#endif diff --git a/plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders.h b/plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders.h index 23db367250..37784436b1 100644 --- a/plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders.h +++ b/plugins/zzogl-pg/opengl/ZeroGSShaders/zerogsshaders.h @@ -40,7 +40,7 @@ static __forceinline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps); } -extern CGcontext g_cgcontext; +extern ZZshContext g_cgcontext; static __forceinline CGprogram LoadShaderFromType(const char* srcdir, const char* srcfile, int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int ps, int context) { @@ -63,7 +63,7 @@ static __forceinline CGprogram LoadShaderFromType(const char* srcdir, const char if( ps & SHADER_ACCURATE ) macros.push_back("-DACCURATE_DECOMPRESSION"); macros.push_back(NULL); - CGprogram prog = cgCreateProgramFromFile(g_cgcontext, CG_SOURCE, srcfile, CG_PROFILE_ARBFP1, str, ¯os[0]); + ZZshProgram prog = cgCreateProgramFromFile(g_cgcontext, CG_SOURCE, srcfile, CG_PROFILE_ARBFP1, str, ¯os[0]); if( !cgIsProgram(prog) ) { printf("Failed to load shader %s: \n%s\n", str, cgGetLastListing(g_cgcontext)); return NULL; diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 151046f7f2..bca0577ebe 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -18,8 +18,6 @@ */ #include "GS.h" -#include -#include #include @@ -27,6 +25,7 @@ #include "x86.h" #include "zerogs.h" #include "targets.h" +#include "ZZoglShaders.h" #define RHA //#define RW @@ -128,7 +127,7 @@ inline Vector ZeroGS::CRenderTarget::DefaultBitBltPos() { Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), 0.5f / (float)RH(fbh)); v *= 1.0f / 32767.0f; - ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_sBitBltPos"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_sBitBltPos"); return v; } @@ -139,7 +138,7 @@ inline Vector ZeroGS::CRenderTarget::DefaultBitBltTex() // I really sure that -0.5 is correct, because OpenGL have no half-offset // issue, DirectX known for. Vector v = Vector(1, -1, 0.5f / (float)RW(fbw), -0.5f / (float)RH(fbh)); - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_sBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_sBitBltTex"); return v; } @@ -237,11 +236,11 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co v.y = vposxy.y; v.z = vposxy.z; v.w = vposxy.w - dy * 2.0f / (float)fbh; - ZZcgSetParameter4fv(g_vparamPosXY[context], v, "g_fPosXY"); + ZZshSetParameter4fv(g_vparamPosXY[context], v, "g_fPosXY"); } else { - ZZcgSetParameter4fv(g_vparamPosXY[context], vposxy, "g_fPosXY"); + ZZshSetParameter4fv(g_vparamPosXY[context], vposxy, "g_fPosXY"); } // set render states @@ -434,8 +433,7 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) if (nUpdateTarg) { - cgGLSetTextureParameter(ppsBaseTexture.sFinal, ittarg->second->ptex); - cgGLEnableTextureParameter(ppsBaseTexture.sFinal); + ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ittarg->second->ptex, "BaseTexture.final"); //assert( ittarg->second->fbw == fbw ); int offset = (fbp - ittarg->second->fbp) * 64 / fbw; @@ -448,17 +446,19 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) v.z = 0.25f; v.w = (float)RH(offset) + 0.25f; - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); // v = DefaultBitBltTex(); Maybe? - v = DefaultOneColor(ppsBaseTexture) ; + ZZshDefaultOneColor ( ppsBaseTexture ); - SETPIXELSHADER(ppsBaseTexture.prog); + ZZshSetPixelShader(ppsBaseTexture.prog); nUpdateTarg = 0; } else { + u32 bit_idx = (AA.x == 0) ? 0 : 1; + // align the rect to the nearest page // note that fbp is always aligned on page boundaries tex0Info texframe; @@ -467,21 +467,20 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) texframe.tw = fbw; texframe.th = fbh; texframe.psm = psm; - CMemoryTarget* pmemtarg = g_MemTargs.GetMemoryTarget(texframe, 1); // write color and zero out stencil buf, always 0 context! // force bilinear if using AA // Fix in r133 -- FFX movies and Gust backgrounds! - SetTexVariablesInt(0, 0*(s_AAx || s_AAy) ? 2 : 0, texframe, pmemtarg, &ppsBitBlt[!!s_AAx], 1); - cgGLSetTextureParameter(ppsBitBlt[!!s_AAx].sMemory, pmemtarg->ptex->tex); - cgGLEnableTextureParameter(ppsBitBlt[!!s_AAx].sMemory); + //SetTexVariablesInt(0, 0*(AA.x || AA.y) ? 2 : 0, texframe, false, &ppsBitBlt[!!s_AAx], 1); + SetTexVariablesInt(0, 0, texframe, false, &ppsBitBlt[bit_idx], 1); + ZZshGLSetTextureParameter(ppsBitBlt[bit_idx].sMemory, vb[0].pmemtarg->ptex->tex, "BitBlt.memory"); v = Vector(1, 1, 0.0f, 0.0f); - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); v.x = 1; v.y = 2; - ZZcgSetParameter4fv(ppsBitBlt[!!s_AAx].sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(ppsBitBlt[bit_idx].sOneColor, v, "g_fOneColor"); assert(ptex != 0); @@ -496,11 +495,11 @@ void ZeroGS::CRenderTarget::Update(int context, ZeroGS::CRenderTarget* pdepth) } // render with an AA shader if possible (bilinearly interpolates data) - //cgGLLoadProgram(ppsBitBlt[!!s_AAx].prog); - SETPIXELSHADER(ppsBitBlt[!!s_AAx].prog); + //cgGLLoadProgram(ppsBitBlt[bit_idx].prog); + ZZshSetPixelShader(ppsBitBlt[bit_idx].prog); } - SETVERTEXSHADER(pvsBitBlt.prog); + ZZshSetVertexShader(pvsBitBlt.prog); DrawTriangleArray(); @@ -545,22 +544,22 @@ void ZeroGS::CRenderTarget::ConvertTo32() v.y = (float)RH(16); v.z = -(float)RW(fbw); v.w = (float)RH(8); - ZZcgSetParameter4fv(ppsConvert16to32.fTexOffset, v, "g_fTexOffset"); + ZZshSetParameter4fv(ppsConvert16to32.fTexOffset, v, "g_fTexOffset"); v.x = (float)RW(8); v.y = 0; v.z = 0; v.w = 0.25f; - ZZcgSetParameter4fv(ppsConvert16to32.fPageOffset, v, "g_fPageOffset"); + ZZshSetParameter4fv(ppsConvert16to32.fPageOffset, v, "g_fPageOffset"); v.x = (float)RW(2 * fbw); v.y = (float)RH(fbh); v.z = 0; v.w = 0.0001f * (float)RH(fbh); - ZZcgSetParameter4fv(ppsConvert16to32.fTexDims, v, "g_fTexDims"); + ZZshSetParameter4fv(ppsConvert16to32.fTexDims, v, "g_fTexDims"); // v.x = 0; -// ZZcgSetParameter4fv(ppsConvert16to32.fTexBlock, v, "g_fTexBlock"); +// ZZshSetParameter4fv(ppsConvert16to32.fTexBlock, v, "g_fTexBlock"); glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); @@ -569,10 +568,8 @@ void ZeroGS::CRenderTarget::ConvertTo32() FBTexture(0, ptexConv); ZeroGS::ResetRenderTarget(1); - BindToSample(&ptex) ; - - cgGLSetTextureParameter(ppsConvert16to32.sFinal, ptex); - cgGLEnableTextureParameter(ppsBitBlt[!!s_AAx].sMemory); + BindToSample(&ptex); + ZZshGLSetTextureParameter(ppsConvert16to32.sFinal, ptex, "Convert 16 to 32.Final"); fbh /= 2; // have 16 bit surfaces are usually 2x higher SetViewport(); @@ -580,9 +577,8 @@ void ZeroGS::CRenderTarget::ConvertTo32() if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); // render with an AA shader if possible (bilinearly interpolates data) - SETVERTEXSHADER(pvsBitBlt.prog); - - SETPIXELSHADER(ppsConvert16to32.prog); + ZZshSetVertexShader(pvsBitBlt.prog); + ZZshSetPixelShader(ppsConvert16to32.prog); DrawTriangleArray(); #ifdef _DEBUG @@ -600,7 +596,6 @@ void ZeroGS::CRenderTarget::ConvertTo32() // restore SAFE_RELEASE_TEX(ptex); - SAFE_RELEASE_TEX(ptexFeedback); ptex = ptexConv; @@ -609,7 +604,7 @@ void ZeroGS::CRenderTarget::ConvertTo32() if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); // reset textures - BindToSample(&ptex) ; + BindToSample(&ptex); glEnable(GL_SCISSOR_TEST); @@ -653,19 +648,19 @@ void ZeroGS::CRenderTarget::ConvertTo16() v.y = 8.0f / (float)fbh; v.z = 0.5f * v.x; v.w = 0.5f * v.y; - ZZcgSetParameter4fv(ppsConvert32to16.fTexOffset, v, "g_fTexOffset"); + ZZshSetParameter4fv(ppsConvert32to16.fTexOffset, v, "g_fTexOffset"); v.x = 256.0f / 255.0f; v.y = 256.0f / 255.0f; v.z = 0.05f / 256.0f; v.w = -0.001f / 256.0f; - ZZcgSetParameter4fv(ppsConvert32to16.fPageOffset, v, "g_fPageOffset"); + ZZshSetParameter4fv(ppsConvert32to16.fPageOffset, v, "g_fPageOffset"); v.x = (float)RW(fbw); v.y = (float)RH(2 * fbh); v.z = 0; v.w = -0.1f / RH(fbh); - ZZcgSetParameter4fv(ppsConvert32to16.fTexDims, v, "g_fTexDims"); + ZZshSetParameter4fv(ppsConvert32to16.fTexDims, v, "g_fTexDims"); glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); @@ -675,10 +670,9 @@ void ZeroGS::CRenderTarget::ConvertTo16() ZeroGS::ResetRenderTarget(1); GL_REPORT_ERRORD(); - BindToSample(&ptex) ; + BindToSample(&ptex); - cgGLSetTextureParameter(ppsConvert32to16.sFinal, ptex); - cgGLEnableTextureParameter(ppsConvert32to16.sFinal); + ZZshGLSetTextureParameter(ppsConvert32to16.sFinal, ptex, "Convert 32 to 16"); // fbh *= 2; // have 16 bit surfaces are usually 2x higher @@ -687,9 +681,8 @@ void ZeroGS::CRenderTarget::ConvertTo16() if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); // render with an AA shader if possible (bilinearly interpolates data) - SETVERTEXSHADER(pvsBitBlt.prog); - - SETPIXELSHADER(ppsConvert32to16.prog); + ZZshSetVertexShader(pvsBitBlt.prog); + ZZshSetPixelShader(ppsConvert32to16.prog); DrawTriangleArray(); #ifdef _DEBUG @@ -702,7 +695,6 @@ void ZeroGS::CRenderTarget::ConvertTo16() #endif vposxy.y = -2.0f * (32767.0f / 8.0f) / (float)fbh; - vposxy.w = 1 + 0.5f / fbh; // restore @@ -759,11 +751,11 @@ void ZeroGS::CRenderTarget::_CreateFeedback() // tex coords, test ffx bikanel island when changing these /* Vector v = DefaultBitBltPos(); v = Vector ((float)(RW(fbw+4)), (float)(RH(fbh+4)), +0.25f, -0.25f); - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBltTex");*/ + ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "BitBltTex");*/ // tex coords, test ffx bikanel island when changing these -// Vector v = Vector(1, -1, 0.5f / (fbw<ptex->tex); - cgGLEnableTextureParameter(ppsBaseTexture.sFinal); - + SetTexVariablesInt(0, 0, texframe, false, &ppsBitBltDepth, 1); + ZZshGLSetTextureParameter(ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth"); + Vector v = DefaultBitBltPos(); v = DefaultBitBltTex(); @@ -999,7 +987,7 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr) v.y = 2; v.z = PSMT_IS16Z(psm) ? 1.0f : 0.0f; v.w = g_filog32; - ZZcgSetParameter4fv(ppsBitBltDepth.sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(ppsBitBltDepth.sOneColor, v, "g_fOneColor"); Vector vdepth = g_vdepth; @@ -1014,7 +1002,7 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr) assert(ppsBitBltDepth.sBitBltZ != 0); - ZZcgSetParameter4fv(ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ"); + ZZshSetParameter4fv(ppsBitBltDepth.sBitBltZ, ((255.0f / 256.0f)*vdepth), "g_fBitBltZ"); assert(pdepth != 0); //GLint w1 = 0; @@ -1039,8 +1027,8 @@ void ZeroGS::CDepthTarget::Update(int context, ZeroGS::CRenderTarget* prndr) glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); - SETVERTEXSHADER(pvsBitBlt.prog); - SETPIXELSHADER(ppsBitBltDepth.prog); + ZZshSetVertexShader(pvsBitBlt.prog); + ZZshSetPixelShader(ppsBitBltDepth.prog); DrawTriangleArray(); @@ -1892,7 +1880,7 @@ static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* #define TARGET_THRESH 0x500 -extern int g_MaxTexWidth, g_MaxTexHeight; +extern int g_MaxTexWidth, g_MaxTexHeight; // Maximum height & width of supported texture. //#define SORT_TARGETS inline list::iterator ZeroGS::CMemoryTargetMngr::DestroyTargetIter(list::iterator& it) @@ -2057,29 +2045,6 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_SearchExistTarget return NULL; } -static __forceinline int NumberOfChannels(int psm) -{ - int channels = 1; - - if (PSMT_ISCLUT(psm)) - { - if (psm == PSMT8) - channels = 4; - else if (psm == PSMT4) - channels = 8; - } - else - { - if (PSMT_IS16BIT(psm)) - { - // 16z needs to be a8r8g8b8 - channels = 2; - } - } - - return channels; -} - ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height) { CMemoryTarget* targ = NULL; @@ -2093,9 +2058,7 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::MemoryTarget_ClearedTargetsSea if ((height <= itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels)) { // check channels - int targchannels = NumberOfChannels(itbest->psm); - - if (targchannels == channels) break; + if (PIXELS_PER_WORD(itbest->psm) == channels) break; } ++itbest; @@ -2140,12 +2103,14 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info u32 fmt = GL_UNSIGNED_BYTE; + // RGBA16 storage format if (PSMT_ISHALF_STORAGE(tex0)) fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV; int widthmult = 1, channels = 1; + // If our texture is too big and could not be placed in 1 GPU texture. Pretty rare. if ((g_MaxTexHeight < 4096) && (end - start > g_MaxTexHeight)) widthmult = 2; - channels = NumberOfChannels(tex0.psm); + channels = PIXELS_PER_WORD(tex0.psm); targ = MemoryTarget_ClearedTargetsSearch(fmt, widthmult, channels, end - start); @@ -3122,11 +3087,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMCT24: - if (s_AAy) + if (AA.y) { - RESOLVE_32BIT(32, u32, u32, 32A4, 8, 8, (u32), Frame, s_AAx, s_AAy); + RESOLVE_32BIT(32, u32, u32, 32A4, 8, 8, (u32), Frame, AA.x, AA.y); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(32, u32, u32, 32A2, 8, 8, (u32), Frame, 1, 0); } @@ -3139,11 +3104,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMCT16: - if (s_AAy) + if (AA.y) { - RESOLVE_32BIT(16, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, s_AAx, s_AAy); + RESOLVE_32BIT(16, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, AA.x, AA.y); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16, u16, u32, 16A2, 16, 8, RGBA32to16, Frame, 1, 0); } @@ -3156,11 +3121,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMCT16S: - if (s_AAy) + if (AA.y) { - RESOLVE_32BIT(16S, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, s_AAx, s_AAy); + RESOLVE_32BIT(16S, u16, u32, 16A4, 16, 8, RGBA32to16, Frame, AA.x, AA.y); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16S, u16, u32, 16A2, 16, 8, RGBA32to16, Frame, 1, 0); } @@ -3175,11 +3140,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMT24Z: - if (s_AAy) + if (AA.y) { - RESOLVE_32BIT(32Z, u32, u32, 32A4, 8, 8, (u32), Frame, s_AAx, s_AAy); + RESOLVE_32BIT(32Z, u32, u32, 32A4, 8, 8, (u32), Frame, AA.x, AA.y); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(32Z, u32, u32, 32A2, 8, 8, (u32), Frame, 1, 0); } @@ -3192,11 +3157,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMT16Z: - if (s_AAy) + if (AA.y) { - RESOLVE_32BIT(16Z, u16, u32, 16A4, 16, 8, (u16), Frame, s_AAx, s_AAy); + RESOLVE_32BIT(16Z, u16, u32, 16A4, 16, 8, (u16), Frame, AA.x, AA.y); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16Z, u16, u32, 16A2, 16, 8, (u16), Frame, 1, 0); } @@ -3209,11 +3174,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMT16SZ: - if (s_AAy) + if (AA.y) { - RESOLVE_32BIT(16SZ, u16, u32, 16A4, 16, 8, (u16), Frame, s_AAx, s_AAy); + RESOLVE_32BIT(16SZ, u16, u32, 16A4, 16, 8, (u16), Frame, AA.x, AA.y); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16SZ, u16, u32, 16A2, 16, 8, (u16), Frame, 1, 0); } @@ -3234,11 +3199,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMCT24: - if (s_AAy) + if (AA.y) { RESOLVE_32BIT(32, u32, Vector_16F, 32A4, 8, 8, Float16ToARGB, Frame16, 1, 1); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(32, u32, Vector_16F, 32A2, 8, 8, Float16ToARGB, Frame16, 1, 0); } @@ -3251,11 +3216,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMCT16: - if (s_AAy) + if (AA.y) { RESOLVE_32BIT(16, u16, Vector_16F, 16A4, 16, 8, Float16ToARGB16, Frame16, 1, 1); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16, u16, Vector_16F, 16A2, 16, 8, Float16ToARGB16, Frame16, 1, 0); } @@ -3268,11 +3233,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMCT16S: - if (s_AAy) + if (AA.y) { RESOLVE_32BIT(16S, u16, Vector_16F, 16A4, 16, 8, Float16ToARGB16, Frame16, 1, 1); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16S, u16, Vector_16F, 16A2, 16, 8, Float16ToARGB16, Frame16, 1, 0); } @@ -3287,11 +3252,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMT24Z: - if (s_AAy) + if (AA.y) { RESOLVE_32BIT(32Z, u32, Vector_16F, 32ZA4, 8, 8, Float16ToARGB_Z, Frame16, 1, 1); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(32Z, u32, Vector_16F, 32ZA2, 8, 8, Float16ToARGB_Z, Frame16, 1, 0); } @@ -3304,11 +3269,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMT16Z: - if (s_AAy) + if (AA.y) { RESOLVE_32BIT(16Z, u16, Vector_16F, 16ZA4, 16, 8, Float16ToARGB16_Z, Frame16, 1, 1); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16Z, u16, Vector_16F, 16ZA2, 16, 8, Float16ToARGB16_Z, Frame16, 1, 0); } @@ -3321,11 +3286,11 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo case PSMT16SZ: - if (s_AAy) + if (AA.y) { RESOLVE_32BIT(16SZ, u16, Vector_16F, 16ZA4, 16, 8, Float16ToARGB16_Z, Frame16, 1, 1); } - else if (s_AAx) + else if (AA.x) { RESOLVE_32BIT(16SZ, u16, Vector_16F, 16ZA2, 16, 8, Float16ToARGB16_Z, Frame16, 1, 0); } diff --git a/plugins/zzogl-pg/opengl/targets.h b/plugins/zzogl-pg/opengl/targets.h index 6a26facfc0..e9ecc8dafb 100644 --- a/plugins/zzogl-pg/opengl/targets.h +++ b/plugins/zzogl-pg/opengl/targets.h @@ -27,13 +27,6 @@ #define GL_TEXTURE_RECTANGLE GL_TEXTURE_RECTANGLE_NV #endif -inline Vector DefaultOneColor(FRAGMENTSHADER ptr) -{ - Vector v = Vector(1, 1, 1, 1); - cgGLSetParameter4fv(ptr.sOneColor, v); - return v ; -} - namespace ZeroGS { @@ -206,24 +199,19 @@ extern CRenderTargetMngr s_RTs, s_DepthRTs; extern CBitwiseTextureMngr s_BitwiseTextures; extern CMemoryTargetMngr g_MemTargs; -extern u8 s_AAx, s_AAy, s_AAz, s_AAw; +//extern u8 s_AAx, s_AAy; +extern Point AA; -// Real rendered width, depends on AA and AAneg. +// Real rendered width, depends on AA. inline int RW(int tbw) { - if (s_AAx >= s_AAz) - return (tbw << (s_AAx - s_AAz)); - else - return (tbw >> (s_AAz - s_AAx)); + return (tbw << AA.x); } -// Real rendered height, depends on AA and AAneg. +// Real rendered height, depends on AA. inline int RH(int tbh) { - if (s_AAy >= s_AAw) - return (tbh << (s_AAy - s_AAw)); - else - return (tbh >> (s_AAw - s_AAy)); + return (tbh << AA.y); } /* inline void CreateTargetsList(int start, int end, list& listTargs) { @@ -242,10 +230,6 @@ inline list CreateTargetsList(int start, int end) extern Vector g_vdepth; extern int icurctx; - -extern VERTEXSHADER pvsBitBlt; -extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; -extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; extern GLuint vboRect; // Unworking diff --git a/plugins/zzogl-pg/opengl/zerogs.cpp b/plugins/zzogl-pg/opengl/zerogs.cpp index a14fbf101e..f00e5bb48f 100644 --- a/plugins/zzogl-pg/opengl/zerogs.cpp +++ b/plugins/zzogl-pg/opengl/zerogs.cpp @@ -32,6 +32,7 @@ #include "zpipe.h" #include "targets.h" #include "GLWin.h" +#include "ZZoglShaders.h" //----------------------- Defines @@ -50,7 +51,7 @@ extern int g_nFrame, g_nRealFrame; //-------------------------- Variables primInfo *prim; -CGprogram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ +ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ inline u32 FtoDW(float f) { return (*((u32*)&f)); } @@ -81,7 +82,7 @@ PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL; ///////////////////// // graphics resources -CGparameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0; +ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0; bool s_bTexFlush = false; int s_nLastResolveReset = 0; @@ -104,10 +105,6 @@ GLenum GetRenderTargetFormat() { return GetRenderFormat() == RFT_byte8 ? 4 : g_i // returns the first and last addresses aligned to a page that cover void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw); -// bool LoadEffects(); -// bool LoadExtraEffects(); -// FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); - int s_nNewWidth = -1, s_nNewHeight = -1; void ChangeDeviceSize(int nNewWidth, int nNewHeight); @@ -343,8 +340,7 @@ extern RasterFont* font_p; void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color) { FUNCLOG - cgGLDisableProfile(cgvProf); - cgGLDisableProfile(cgfProf); + ZZshGLDisableProfile(); Vector v; v.SetColor(color); @@ -352,8 +348,7 @@ void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color) //glColor3f(((color >> 16) & 0xff) / 255.0f, ((color >> 8) & 0xff)/ 255.0f, (color & 0xff) / 255.0f); font_p->printString(pstr, left * 2.0f / (float)nBackbufferWidth - 1, 1 - top * 2.0f / (float)nBackbufferHeight, 0); - cgGLEnableProfile(cgvProf); - cgGLEnableProfile(cgfProf); + ZZshGLEnableProfile(); } void ZeroGS::ChangeWindowSize(int nNewWidth, int nNewHeight) @@ -409,42 +404,10 @@ void ZeroGS::ChangeDeviceSize(int nNewWidth, int nNewHeight) assert(vb[0].pBufferData != NULL && vb[1].pBufferData != NULL); } - -void ZeroGS::SetNegAA(int mode) -{ - FUNCLOG - // need to flush all targets - s_RTs.ResolveAll(); - s_RTs.Destroy(); - s_DepthRTs.ResolveAll(); - s_DepthRTs.Destroy(); - - s_AAz = s_AAw = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing. - - if (mode > 0) - { - s_AAz = (mode + 1) / 2; // ( 1, 0 ) ; ( 1, 1 ) -- it's used as binary shift, so x << s_AAz, y << s_AAw - s_AAw = mode / 2; - } - - memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts)); - - s_nLastResolveReset = 0; - - vb[0].prndr = NULL; - vb[0].pdepth = NULL; - vb[0].bNeedFrameCheck = 1; - vb[0].bNeedZCheck = 1; - vb[1].prndr = NULL; - vb[1].pdepth = NULL; - vb[1].bNeedFrameCheck = 1; - vb[1].bNeedZCheck = 1; -} - void ZeroGS::SetAA(int mode) { FUNCLOG - float f; + float f = 1.0f; // need to flush all targets s_RTs.ResolveAll(); @@ -452,28 +415,28 @@ void ZeroGS::SetAA(int mode) s_DepthRTs.ResolveAll(); s_DepthRTs.Destroy(); - s_AAx = s_AAy = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing. - + AA.x = AA.y = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing. + if (mode > 0) { - s_AAx = (mode + 1) / 2; // ( 1, 0 ) ; ( 1, 1 ) ; ( 2, 1 ) ; ( 2, 2 ) -- it's used as binary shift, so x >> s_AAx, y >> s_AAy - s_AAy = mode / 2; + // ( 1, 0 ) ; ( 1, 1 ) ; ( 2, 1 ) ; ( 2, 2 ) + // it's used as a binary shift, so x >> AA.x, y >> AA.y + AA.x = (mode + 1) / 2; + AA.y = mode / 2; + f = 2.0f; } memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts)); - s_nLastResolveReset = 0; vb[0].prndr = NULL; vb[0].pdepth = NULL; - vb[0].bNeedFrameCheck = 1; - vb[0].bNeedZCheck = 1; vb[1].prndr = NULL; vb[1].pdepth = NULL; - vb[1].bNeedFrameCheck = 1; - vb[1].bNeedZCheck = 1; + + vb[0].bNeedFrameCheck = vb[0].bNeedZCheck = 1; + vb[1].bNeedFrameCheck = vb[1].bNeedZCheck = 1; - f = mode > 0 ? 2.0f : 1.0f; glPointSize(f); } @@ -486,14 +449,6 @@ void ZeroGS::Prim() if (curvb.CheckPrim()) Flush(prim->ctxt); curvb.curprim._val = prim->_val; - - // flush the other pipe if sharing the same buffer -// if( vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp && vb[!prim->ctxt].nCount > 0 ) -// { -// assert( vb[prim->ctxt].nCount == 0 ); -// Flush(!prim->ctxt); -// } - curvb.curprim.prim = prim->prim; } @@ -537,25 +492,24 @@ void ZeroGS::RenderCustom(float fAlpha) // tex coords Vector v = Vector(1 / 32767.0f, 1 / 32767.0f, 0, 0); - ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos"); v.x = (float)nLogoWidth; v.y = (float)nLogoHeight; - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); + ZZshSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex"); v.x = v.y = v.z = v.w = fAlpha; - ZZcgSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor"); + ZZshSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor"); if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); // inside vhDCb[0]'s target area, so render that region only - cgGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo); - cgGLEnableTextureParameter(ppsBaseTexture.sFinal); + ZZshGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo, "Logo"); glBindBuffer(GL_ARRAY_BUFFER, vboRect); SET_STREAM(); - SETVERTEXSHADER(pvsBitBlt.prog); - SETPIXELSHADER(ppsBaseTexture.prog); + ZZshSetVertexShader(pvsBitBlt.prog); + ZZshSetPixelShader(ppsBaseTexture.prog); DrawTriangleArray(); // restore @@ -657,7 +611,7 @@ void ZeroGS::KickPoint() curvb.NotifyWrite(1); - int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex); + int last = gs.primNext(2); VertexGPU* p = curvb.pBufferData + curvb.nCount; SET_VERTEX(&p[0], last, curvb); @@ -682,8 +636,8 @@ void ZeroGS::KickLine() curvb.NotifyWrite(2); - int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex); - int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex); + int next = gs.primNext(); + int last = gs.primNext(2); VertexGPU* p = curvb.pBufferData + curvb.nCount; SET_VERTEX(&p[0], next, curvb); @@ -748,7 +702,7 @@ void ZeroGS::KickTriangleFan() // add 1 to skip the first vertex - if (gs.primIndex == gs.nTriFanVert) gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex); + if (gs.primIndex == gs.nTriFanVert) gs.primIndex = gs.primNext(); OUTPUT_VERT(p[0], 0); OUTPUT_VERT(p[1], 1); @@ -777,13 +731,12 @@ void ZeroGS::KickSprite() } curvb.NotifyWrite(6); - - int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex); - int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex); + int next = gs.primNext(); + int last = gs.primNext(2); // sprite is too small and AA shows lines (tek4, Mana Khemia) - gs.gsvertex[last].x += (4*s_AAx); - gs.gsvertex[last].y += (4*s_AAy); + gs.gsvertex[last].x += (4 * AA.x); + gs.gsvertex[last].y += (4 * AA.y); // might be bad sprite (KH dialog text) //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y ) @@ -832,11 +785,8 @@ void ZeroGS::SetFogColor(u32 fog) Vector v; // set it immediately -// v.x = (gs.fogcol & 0xff) / 255.0f; -// v.y = ((gs.fogcol >> 8) & 0xff) / 255.0f; -// v.z = ((gs.fogcol >> 16) & 0xff) / 255.0f; v.SetColor(gs.fogcol); - ZZcgSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor"); + ZZshSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor"); // } } @@ -851,7 +801,7 @@ void ZeroGS::SetFogColor(GIFRegFOGCOL* fog) v.x = fog->FCR / 255.0f; v.y = fog->FCG / 255.0f; v.z = fog->FCB / 255.0f; - ZZcgSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor"); + ZZshSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor"); } void ZeroGS::ExtWrite() diff --git a/plugins/zzogl-pg/opengl/zerogs.h b/plugins/zzogl-pg/opengl/zerogs.h index b1d6439132..f8fcd2366c 100644 --- a/plugins/zzogl-pg/opengl/zerogs.h +++ b/plugins/zzogl-pg/opengl/zerogs.h @@ -34,12 +34,9 @@ #include "GS.h" #include "CRC.h" #include "rasterfont.h" // simple font -#include "ZeroGSShaders/zerogsshaders.h" using namespace std; - - //------------------------ Constants ---------------------- #define VB_BUFFERSIZE 0x400 @@ -48,7 +45,6 @@ const float g_filog32 = 0.999f / (32.0f * logf(2.0f)); //------------------------ Inlines ------------------------- - // Calculate maximum height for target inline int get_maxheight(int fbp, int fbw, int psm) { @@ -62,29 +58,13 @@ inline int get_maxheight(int fbp, int fbw, int psm) return ret; } -// Does psm need Alpha test with alpha expansion? -inline int nNeedAlpha(u8 psm) -{ - return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S); -} - -// Get color storage model psm, that is important on flush stage. -inline u8 GetTexCPSM(const tex0Info& tex) -{ - if (PSMT_ISCLUT(tex.psm)) - return tex.cpsm; - else - return tex.psm; -} - - // ------------------------ Variables ------------------------- + // all textures have this width -//#define GPU_TEXWIDTH 512 extern int GPU_TEXWIDTH; extern float g_fiGPU_TEXWIDTH; -#define MASKDIVISOR 0 -#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode +#define MASKDIVISOR 0 // Used for decrement bitwise mask texture size if 1024 is too big +#define GPU_TEXMASKWIDTH (1024 >> MASKDIVISOR) // bitwise mask width for region repeat mode extern u32 ptexBilinearBlocks; @@ -423,15 +403,6 @@ union }; -// Return, if tcc, aem or psm mode told us, than Alpha test should be used -// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure -// that it's correct, psm -- color mode, -inline bool -IsAlphaTestExpansion(VB& curvb) -{ - return (curvb.tex0.tcc && gs.texa.aem && nNeedAlpha(GetTexCPSM(curvb.tex0))); -} - // visible members extern DrawFn drawfn[8]; @@ -441,17 +412,6 @@ extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height extern vector g_vboBuffers; // VBOs for all drawing commands extern GLuint vboRect; extern int g_nCurVBOIndex; - -// Shaders variables -extern Vector g_vdepth; -extern Vector vlogz; -extern VERTEXSHADER pvsBitBlt; -extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; -extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; -bool LoadEffects(); -bool LoadExtraEffects(); -FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); - extern RenderFormatType g_RenderFormatType; void AddMessage(const char* pstr, u32 ms = 5000); @@ -460,7 +420,6 @@ void ChangeWindowSize(int nNewWidth, int nNewHeight); void SetChangeDeviceSize(int nNewWidth, int nNewHeight); void ChangeDeviceSize(int nNewWidth, int nNewHeight); void SetAA(int mode); -void SetNegAA(int mode); void SetCRC(int crc); void ReloadEffects(); @@ -545,10 +504,6 @@ void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h void SetContextTarget(int context) ; void NeedFactor(int w); -// only sets a limited amount of state (for Update) -void SetTexClamping(int context, FRAGMENTSHADER* pfragment); -void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, ZeroGS::CMemoryTarget* pmemtarg, FRAGMENTSHADER* pfragment, int force); - void ResetAlphaVariables(); void StartCapture();