diff --git a/build.sh b/build.sh index ca1c11b69d..e409a5c185 100644 --- a/build.sh +++ b/build.sh @@ -14,9 +14,6 @@ export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --prefix `pwd #Debug / Devbuild version #export PCSX2OPTIONS="--enable-debug --enable-devbuild --enable-sse3 --prefix `pwd`" -#Optimized, but a devbuild - with memcpy_fast_ enabled. - BROKEN! -#export PCSX2OPTIONS="--enable-sse3 --enable-sse4 --enable-devbuild --enable-memcpyfast --prefix `pwd`" - #ZeroGS Normal mode export ZEROGSOPTIONS="--enable-sse2" diff --git a/fps2bios/Makefile b/fps2bios/Makefile index d591c80da7..b4f9102335 100644 --- a/fps2bios/Makefile +++ b/fps2bios/Makefile @@ -13,12 +13,10 @@ STRIP = strip OPTIMIZE = -O2 -fomit-frame-pointer -finline-functions -ffast-math CFLAGS = -Wall ${OPTIMIZE} -I. DIRS = kernel intro loader -FILES = RESET ROMDIR EXTINFO ROMVER IOPBOOT EELOAD \ +FILES = RESET ROMDIR ROMVER IOPBOOT EELOAD \ SYSMEM LOADCORE EXCEPMAN INTRMAN SSBUSC DMACMAN \ TIMRMAN SYSCLIB HEAPLIB THREADMAN VBLANK STDIO \ - SIFMAN SIFCMD SIO2MAN LOADER INTRO IOPBTCONF FP2BLOGO \ - IOMAN MODLOAD ROMDRV IGREETING REBOOT LOADFILE CDVDMAN \ - CDVDFSV SIFINIT FILEIO SECRMAN EESYNC + SIFMAN SIFCMD SIO2MAN LOADER INTRO IOPBTCONF FP2BLOGO ps2romgen_exe: ps2romgen.o ${CC} ${CFLAGS} ps2romgen.o -o build/ps2romgen_exe @@ -33,7 +31,6 @@ fps2bios: for i in $(DIRS); do \ (cd $$i; make; cd ..) \ done; - cp -f used/* build cp -f FP2BLOGO build cp -f IOPBTCONF build/ (cd build; \ diff --git a/fps2bios/romdir.c b/fps2bios/romdir.c index 193ee09d33..4e2ccf5d5d 100644 --- a/fps2bios/romdir.c +++ b/fps2bios/romdir.c @@ -43,6 +43,7 @@ int main(int argc, char *argv[]) { } for (i=1; i -#include "PsxCommon.h" +#include "IopCommon.h" #include "CDVDiso.h" static cdvdStruct cdvd; @@ -462,7 +462,7 @@ void cdvdReadKey(u8 arg0, u16 arg1, u32 arg2, u8* key) { // get main elf name GetPS2ElfName(str); - sprintf(exeName, "%c%c%c%c%c%c%c%c%c%c%c",str[8],str[9],str[10],str[11],str[12],str[13],str[14],str[15],str[16],str[17],str[18]); + sprintf(exeName, "%c%c%c%c%c%c%c%c%c%c%c",str[8],str[9],str[10],str[11],str[12],str[13],str[14],str[15],str[16],str[17],str[18]); DevCon::Notice("exeName = %s", params &str[8]); // convert the number characters to a real 32bit number diff --git a/pcsx2/CDVD.h b/pcsx2/CDVD.h index f793e737e1..9eec464375 100644 --- a/pcsx2/CDVD.h +++ b/pcsx2/CDVD.h @@ -19,7 +19,7 @@ #ifndef __CDVD_H__ #define __CDVD_H__ -#include "PsxCommon.h" +#include "IopCommon.h" struct cdvdRTC { u8 status; diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index ae6540f9ee..cb863ba6a4 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -144,79 +144,259 @@ void WriteTLB(int i) MapTLB(i); } +////////////////////////////////////////////////////////////////////////////////////////// +// Performance Counters Update Stuff! +// +// Note regarding updates of PERF and TIMR registers: never allow increment to be 0. +// That happens when a game loads the MFC0 twice in the same recompiled block (before the +// cpuRegs.cycles update), and can cause games to lock up since it's an unexpected result. +// +// PERF Overflow exceptions: The exception is raised when the MSB of the Performance +// Counter Register is set. I'm assuming the exception continues to re-raise until the +// app clears the bit manually (needs testing). +// +// PERF Events: +// * Event 0 on PCR 0 is unused (counter disable) +// * Event 16 is usable as a specific counter disable bit (since CTE affects both counters) +// * Events 17-31 are reserved (act as counter disable) +// +// Most event mode aren't supported, and issue a warning and do a standard instruction +// count. But only mode 1 (instruction counter) has been found to be used by games thus far. +// + +static __forceinline bool PERF_ShouldCountEvent( uint evt ) +{ + switch( evt ) + { + // This is a rough table of actions for various PCR modes. Some of these + // can be implemented more accurately later. Others (WBBs in particular) + // probably cannot without some severe complications. + + // left sides are PCR0 / right sides are PCR1 + + case 1: // cpu cycle counter. + case 2: // single/dual instruction issued + case 3: // Branch issued / Branch mispredicated + return true; + + case 4: // BTAC/TLB miss + case 5: // ITLB/DTLB miss + case 6: // Data/Instruction cache miss + return false; + + case 7: // Access to DTLB / WBB single request fail + case 8: // Non-blocking load / WBB burst request fail + case 9: + case 10: + return false; + + case 11: // CPU address bus busy / CPU data bus busy + return false; + + case 12: // Instruction completed + case 13: // non-delayslot instruction completed + case 14: // COP2/COP1 instruction complete + case 15: // Load/Store completed + return true; + } + + return false; +} + +// Diagnostics for event modes that we just ignore for now. Using these perf units could +// cause compat issues in some very odd/rare games, so if this msg comes up who knows, +// might save some debugging effort. :) +void COP0_DiagnosticPCCR() +{ + if( cpuRegs.PERF.n.pccr.b.Event0 >= 7 && cpuRegs.PERF.n.pccr.b.Event0 <= 10 ) + Console::Notice( "PERF/PCR0 Unsupported Update Event Mode = 0x%x", params cpuRegs.PERF.n.pccr.b.Event0 ); + + if( cpuRegs.PERF.n.pccr.b.Event1 >= 7 && cpuRegs.PERF.n.pccr.b.Event1 <= 10 ) + Console::Notice( "PERF/PCR1 Unsupported Update Event Mode = 0x%x", params cpuRegs.PERF.n.pccr.b.Event1 ); +} + +__forceinline void COP0_UpdatePCCR() +{ + if( cpuRegs.CP0.n.Status.b.ERL || !cpuRegs.PERF.n.pccr.b.CTE ) return; + + // TODO : Implement memory mode checks here (kernel/super/user) + // For now we just assume user mode. + + if( cpuRegs.PERF.n.pccr.b.U0 ) + { + // ---------------------------------- + // Update Performance Counter 0 + // ---------------------------------- + + if( PERF_ShouldCountEvent( cpuRegs.PERF.n.pccr.b.Event0 ) ) + { + u32 incr = cpuRegs.cycle - s_iLastPERFCycle[0]; + if( incr == 0 ) incr++; + + // use prev/XOR method for one-time exceptions (but likely less correct) + //u32 prev = cpuRegs.PERF.n.pcr0; + cpuRegs.PERF.n.pcr0 += incr; + s_iLastPERFCycle[0] = cpuRegs.cycle; + + //prev ^= (1UL<<31); // XOR is fun! + //if( (prev & cpuRegs.PERF.n.pcr0) & (1UL<<31) ) + if( cpuRegs.PERF.n.pcr0 & 0x80000000 ) + { + // TODO: Vector to the appropriate exception here. + // This code *should* be correct, but is untested (and other parts of the emu are + // not prepared to handle proper Level 2 exception vectors yet) + + /*if( delay_slot ) + { + cpuRegs.CP0.ErrorEPC = cpuRegs.pc - 4; + cpuRegs.CP0.Cause.BD2 = 1; + } + else + { + cpuRegs.CP0.ErrorEPC = cpuRegs.pc; + cpuRegs.CP0.Cause.BD2 = 0; + } + + if( cpuRegs.CP0.Status.DEV ) + { + // Bootstrap vector + cpuRegs.pc = 0xbfc00280; + } + else + { + cpuRegs.pc = 0x80000080; + } + cpuRegs.CP0.Status.ERL = 1; + cpuRegs.CP0.Cause.EXC2 = 2;*/ + } + } + } + + if( cpuRegs.PERF.n.pccr.b.U1 ) + { + // ---------------------------------- + // Update Performance Counter 1 + // ---------------------------------- + + if( PERF_ShouldCountEvent( cpuRegs.PERF.n.pccr.b.Event1 ) ) + { + u32 incr = cpuRegs.cycle - s_iLastPERFCycle[1]; + if( incr == 0 ) incr++; + + cpuRegs.PERF.n.pcr1 += incr; + s_iLastPERFCycle[1] = cpuRegs.cycle; + + if( cpuRegs.PERF.n.pcr1 & 0x80000000 ) + { + // See PCR0 comments for notes on exceptions + } + } + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +// + namespace R5900 { namespace Interpreter { namespace OpcodeImpl { namespace COP0 { -void MFC0() { - if (!_Rt_) return; - if (_Rd_ != 9) { COP0_LOG("%s\n", disR5900Current.getCString() ); } +void MFC0() +{ + // Note on _Rd_ Condition 9: CP0.Count should be updated even if _Rt_ is 0. + if( (_Rd_ != 9) && !_Rt_ ) return; + if(_Rd_ != 9) { COP0_LOG("%s\n", disR5900Current.getCString() ); } //if(bExecBIOS == FALSE && _Rd_ == 25) SysPrintf("MFC0 _Rd_ %x = %x\n", _Rd_, cpuRegs.CP0.r[_Rd_]); - switch (_Rd_) { - - case 12: cpuRegs.GPR.r[_Rt_].UD[0] = (s64)(cpuRegs.CP0.r[_Rd_] & 0xf0c79c1f); break; + switch (_Rd_) + { + case 12: + cpuRegs.GPR.r[_Rt_].SD[0] = (s32)(cpuRegs.CP0.r[_Rd_] & 0xf0c79c1f); + break; + case 25: - switch(_Imm_ & 0x3F){ - case 0: cpuRegs.GPR.r[_Rt_].UD[0] = (s64)cpuRegs.PERF.n.pccr; break; - case 1: - if((cpuRegs.PERF.n.pccr & 0x800003E0) == 0x80000020) { - cpuRegs.PERF.n.pcr0 += cpuRegs.cycle-s_iLastPERFCycle[0]; - s_iLastPERFCycle[0] = cpuRegs.cycle; - } - - cpuRegs.GPR.r[_Rt_].UD[0] = (s64)cpuRegs.PERF.n.pcr0; - break; - case 3: - if((cpuRegs.PERF.n.pccr & 0x800F8000) == 0x80008000) { - cpuRegs.PERF.n.pcr1 += cpuRegs.cycle-s_iLastPERFCycle[1]; - s_iLastPERFCycle[1] = cpuRegs.cycle; - } - cpuRegs.GPR.r[_Rt_].UD[0] = (s64)cpuRegs.PERF.n.pcr1; - break; + switch(_Imm_ & 0x3F) + { + case 0: // MFPS [LSB is clear] + cpuRegs.GPR.r[_Rt_].SD[0] = (s32)cpuRegs.PERF.n.pccr.val; + break; + + case 1: // MFPC [LSB is set] - read PCR0 + COP0_UpdatePCCR(); + cpuRegs.GPR.r[_Rt_].SD[0] = (s32)cpuRegs.PERF.n.pcr0; + break; + + case 3: // MFPC [LSB is set] - read PCR1 + COP0_UpdatePCCR(); + cpuRegs.GPR.r[_Rt_].SD[0] = (s32)cpuRegs.PERF.n.pcr1; + break; } /*SysPrintf("MFC0 PCCR = %x PCR0 = %x PCR1 = %x IMM= %x\n", cpuRegs.PERF.n.pccr, cpuRegs.PERF.n.pcr0, cpuRegs.PERF.n.pcr1, _Imm_ & 0x3F);*/ - break; + break; + case 24: - SysPrintf("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); - break; + Console::WriteLn("MFC0 Breakpoint debug Registers code = %x", params cpuRegs.code & 0x3FF); + break; + case 9: - // update - cpuRegs.CP0.n.Count += cpuRegs.cycle-s_iLastCOP0Cycle; + { + u32 incr = cpuRegs.cycle-s_iLastCOP0Cycle; + if( incr == 0 ) incr++; + cpuRegs.CP0.n.Count += incr; s_iLastCOP0Cycle = cpuRegs.cycle; - default: cpuRegs.GPR.r[_Rt_].UD[0] = (s64)cpuRegs.CP0.r[_Rd_]; + if( !_Rt_ ) break; + } + + default: + cpuRegs.GPR.r[_Rt_].UD[0] = (s64)cpuRegs.CP0.r[_Rd_]; } } -void MTC0() { +void MTC0() +{ COP0_LOG("%s\n", disR5900Current.getCString()); //if(bExecBIOS == FALSE && _Rd_ == 25) SysPrintf("MTC0 _Rd_ %x = %x\n", _Rd_, cpuRegs.CP0.r[_Rd_]); - switch (_Rd_) { + switch (_Rd_) + { case 25: /*if(bExecBIOS == FALSE && _Rd_ == 25) SysPrintf("MTC0 PCCR = %x PCR0 = %x PCR1 = %x IMM= %x\n", cpuRegs.PERF.n.pccr, cpuRegs.PERF.n.pcr0, cpuRegs.PERF.n.pcr1, _Imm_ & 0x3F);*/ - switch(_Imm_ & 0x3F){ - case 0: - if((cpuRegs.PERF.n.pccr & 0x800003E0) == 0x80000020) - cpuRegs.PERF.n.pcr0 += cpuRegs.cycle-s_iLastPERFCycle[0]; - if((cpuRegs.PERF.n.pccr & 0x800F8000) == 0x80008000) - cpuRegs.PERF.n.pcr1 += cpuRegs.cycle-s_iLastPERFCycle[1]; - cpuRegs.PERF.n.pccr = cpuRegs.GPR.r[_Rt_].UL[0]; + switch(_Imm_ & 0x3F) + { + case 0: // MTPS [LSB is clear] + // Updates PCRs and sets the PCCR. + COP0_UpdatePCCR(); + cpuRegs.PERF.n.pccr.val = cpuRegs.GPR.r[_Rt_].UL[0]; + COP0_DiagnosticPCCR(); + break; + + case 1: // MTPC [LSB is set] - set PCR0 + cpuRegs.PERF.n.pcr0 = cpuRegs.GPR.r[_Rt_].UL[0]; s_iLastPERFCycle[0] = cpuRegs.cycle; + break; + + case 3: // MTPC [LSB is set] - set PCR0 + cpuRegs.PERF.n.pcr1 = cpuRegs.GPR.r[_Rt_].UL[0]; s_iLastPERFCycle[1] = cpuRegs.cycle; - break; - case 1: cpuRegs.PERF.n.pcr0 = cpuRegs.GPR.r[_Rt_].UL[0]; s_iLastPERFCycle[0] = cpuRegs.cycle; break; - case 3: cpuRegs.PERF.n.pcr1 = cpuRegs.GPR.r[_Rt_].UL[0]; s_iLastPERFCycle[1] = cpuRegs.cycle; break; + break; } - break; + break; + case 24: - SysPrintf("MTC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); - break; + Console::WriteLn("MTC0 Breakpoint debug Registers code = %x", params cpuRegs.code & 0x3FF); + break; + case 12: WriteCP0Status(cpuRegs.GPR.r[_Rt_].UL[0]); break; - case 9: s_iLastCOP0Cycle = cpuRegs.cycle; cpuRegs.CP0.r[9] = cpuRegs.GPR.r[_Rt_].UL[0]; break; - default: cpuRegs.CP0.r[_Rd_] = cpuRegs.GPR.r[_Rt_].UL[0]; break; + case 9: + s_iLastCOP0Cycle = cpuRegs.cycle; + cpuRegs.CP0.r[9] = cpuRegs.GPR.r[_Rt_].UL[0]; + break; + + default: + cpuRegs.CP0.r[_Rd_] = cpuRegs.GPR.r[_Rt_].UL[0]; + break; } } @@ -233,12 +413,10 @@ int CPCOND0() { void BC0F() { BC0(== 0); - COP0_LOG( "COP0 > BC0F\n" ); } void BC0T() { BC0(== 1); - COP0_LOG( "COP0 > BC0T\n" ); } #define BC0L(cond) \ @@ -248,12 +426,10 @@ void BC0T() { void BC0FL() { BC0L(== 0); - COP0_LOG( "COP0 > BC0FL\n" ); } void BC0TL() { BC0L(== 1); - COP0_LOG( "COP0 > BCOTL\n" ); } void TLBR() { @@ -263,7 +439,6 @@ void TLBR() { int i = cpuRegs.CP0.n.Index&0x1f; - COP0_LOG("COP0 > TLBR\n"); cpuRegs.CP0.n.PageMask = tlb[i].PageMask; cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi&~(tlb[i].PageMask|0x1f00); cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0&~1)|((tlb[i].EntryHi>>12)&1); diff --git a/pcsx2/COP0.h b/pcsx2/COP0.h index 6fb2a200d0..308dc5646f 100644 --- a/pcsx2/COP0.h +++ b/pcsx2/COP0.h @@ -19,10 +19,14 @@ #ifndef __COP0_H__ #define __COP0_H__ -void WriteCP0Status(u32 value); -void UpdateCP0Status(); -void WriteTLB(int i); -void UnmapTLB(int i); -void MapTLB(int i); +extern void WriteCP0Status(u32 value); +extern void UpdateCP0Status(); +extern void WriteTLB(int i); +extern void UnmapTLB(int i); +extern void MapTLB(int i); + +extern void COP0_UpdatePCCR(); +extern void COP0_DiagnosticPCCR(); + #endif /* __COP0_H__ */ diff --git a/pcsx2/CdRom.cpp b/pcsx2/CdRom.cpp index d0a3ff09f6..6947ae044a 100644 --- a/pcsx2/CdRom.cpp +++ b/pcsx2/CdRom.cpp @@ -18,8 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" -#include "Common.h" +#include "IopCommon.h" //THIS ALL IS FOR THE CDROM REGISTERS HANDLING @@ -73,7 +72,6 @@ const char *CmdName[0x100]= { cdrStruct cdr; long LoadCdBios; -int cdOpenCase; u8 Test04[] = { 0 }; u8 Test05[] = { 0 }; diff --git a/pcsx2/CdRom.h b/pcsx2/CdRom.h index be33f9390c..10757dff7e 100644 --- a/pcsx2/CdRom.h +++ b/pcsx2/CdRom.h @@ -19,7 +19,7 @@ #ifndef __CDROM_H__ #define __CDROM_H__ -#include "PsxCommon.h" +#include "IopCommon.h" #include "Decode_XA.h" #include "PS2Edefs.h" diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 7737c6c612..1461bb83bd 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -27,20 +27,20 @@ //#define PSXCLK 186864000 /* 36.864 Mhz */ #define PS2CLK 294912000 //hz /* 294.912 mhz */ +#define PCSX2_VERSION "(beta)" #include "Plugins.h" -#include "Misc.h" #include "SaveState.h" #include "DebugTools/Debug.h" -#include "R5900.h" #include "Memory.h" -#include "Elfheader.h" #include "Hw.h" -// Moving this before one of the other includes causes compilation issues. -//#include "Misc.h" + +#include "R5900.h" +#include "Elfheader.h" #include "Patch.h" -#define PCSX2_VERSION "(beta)" +#include "System.h" +#include "Pcsx2Config.h" #endif /* __COMMON_H__ */ diff --git a/pcsx2/DebugTools/Debug.h b/pcsx2/DebugTools/Debug.h index cc7dac97e9..00ecf53926 100644 --- a/pcsx2/DebugTools/Debug.h +++ b/pcsx2/DebugTools/Debug.h @@ -20,7 +20,7 @@ #ifndef __DEBUG_H__ #define __DEBUG_H__ -#include "Misc.h" +#include "Pcsx2Config.h" extern FILE *emuLog; @@ -73,15 +73,47 @@ namespace R3000A #ifdef PCSX2_DEVBUILD -extern u32 varLog; +struct LogSources +{ + bool + R5900:1, // instructions and exception vectors for the R5900 (EE) + R3000A:1, // instructions and exception vectors for the R3000a (IOP) + + Memory:1, // memory accesses (loads and stores) + Hardware:1, + DMA:1, + Bios:1, + ELF:1, + VU0:1, + COP0:1, // TLB logs, PERF logs, Debug register logs + VIF:1, + SPR:1, // Scratchpad + GIF:1, + SIF:1, + IPU:1, + VUM:1, // VU memory access logs + RPC:1, + Counters:1, // EE's counters! + + IopMemory:1, + IopHardware:1, + IopBios:1, + IopDMA:1, + IopCnt:1, + Memcards:1, + Pad:1, + CDVD:1, + GPU:1, // PS1's GPU (currently unimplemented) + LogToConsole:1; +}; + +extern LogSources varLog; void SourceLog( u16 protocol, u8 source, u32 cpuPc, u32 cpuCycle, const char *fmt, ...); void __Log( const char* fmt, ... ); extern bool SrcLog_CPU( const char* fmt, ... ); extern bool SrcLog_COP0( const char* fmt, ... ); -extern bool SrcLog_FPU( const char* fmt, ... ); -extern bool SrcLog_MMI( const char* fmt, ... ); extern bool SrcLog_MEM( const char* fmt, ... ); extern bool SrcLog_HW( const char* fmt, ... ); @@ -108,42 +140,38 @@ extern bool SrcLog_PSXCNT( const char* fmt, ... ); extern bool SrcLog_MEMCARDS( const char* fmt, ... ); extern bool SrcLog_PAD( const char* fmt, ... ); -extern bool SrcLog_GTE( const char* fmt, ... ); extern bool SrcLog_CDR( const char* fmt, ... ); extern bool SrcLog_GPU( const char* fmt, ... ); -#define CPU_LOG (varLog & 0x00000001) && SrcLog_CPU -#define MEM_LOG (varLog & 0x00000002) && SrcLog_MEM -#define HW_LOG (varLog & 0x00000004) && SrcLog_HW -#define DMA_LOG (varLog & 0x00000008) && SrcLog_DMA -#define BIOS_LOG (varLog & 0x00000010) && SrcLog_BIOS -#define ELF_LOG (varLog & 0x00000020) && SrcLog_ELF -#define FPU_LOG (varLog & 0x00000040) && SrcLog_FPU -#define MMI_LOG (varLog & 0x00000080) && SrcLog_MMI -#define VU0_LOG (varLog & 0x00000100) && SrcLog_VU0 -#define COP0_LOG (varLog & 0x00000200) && SrcLog_COP0 -#define VIF_LOG (varLog & 0x00000400) && SrcLog_VIF -#define SPR_LOG (varLog & 0x00000800) && SrcLog_SPR -#define GIF_LOG (varLog & 0x00001000) && SrcLog_GIF -#define SIF_LOG (varLog & 0x00002000) && SrcLog_SIF -#define IPU_LOG (varLog & 0x00004000) && SrcLog_IPU -#define VUM_LOG (varLog & 0x00008000) && SrcLog_VUM -#define RPC_LOG (varLog & 0x00010000) && SrcLog_RPC -#define EECNT_LOG (varLog & 0x40000000) && SrcLog_EECNT +#define CPU_LOG (varLog.R5900) && SrcLog_CPU +#define MEM_LOG (varLog.Memory) && SrcLog_MEM +#define HW_LOG (varLog.Hardware) && SrcLog_HW +#define DMA_LOG (varLog.DMA) && SrcLog_DMA +#define BIOS_LOG (varLog.Bios) && SrcLog_BIOS +#define ELF_LOG (varLog.ELF) && SrcLog_ELF +#define VU0_LOG (varLog.VU0) && SrcLog_VU0 +#define COP0_LOG (varLog.COP0) && SrcLog_COP0 +#define VIF_LOG (varLog.VIF) && SrcLog_VIF +#define SPR_LOG (varLog.SPR) && SrcLog_SPR +#define GIF_LOG (varLog.GIF) && SrcLog_GIF +#define SIF_LOG (varLog.SIF) && SrcLog_SIF +#define IPU_LOG (varLog.IPU) && SrcLog_IPU +#define VUM_LOG (varLog.VUM) && SrcLog_VUM +#define RPC_LOG (varLog.RPC) && SrcLog_RPC +#define EECNT_LOG (varLog.Counters) && SrcLog_EECNT -#define PSXCPU_LOG (varLog & 0x00100000) && SrcLog_PSXCPU -#define PSXMEM_LOG (varLog & 0x00200000) && SrcLog_PSXMEM -#define PSXHW_LOG (varLog & 0x00400000) && SrcLog_PSXHW -#define PSXBIOS_LOG (varLog & 0x00800000) && SrcLog_PSXBIOS -#define PSXDMA_LOG (varLog & 0x01000000) && SrcLog_PSXDMA -#define PSXCNT_LOG (varLog & 0x20000000) && SrcLog_PSXCNT +#define PSXCPU_LOG (varLog.R3000A) && SrcLog_PSXCPU +#define PSXMEM_LOG (varLog.IopMemory) && SrcLog_PSXMEM +#define PSXHW_LOG (varLog.IopHardware) && SrcLog_PSXHW +#define PSXBIOS_LOG (varLog.IopBios) && SrcLog_PSXBIOS +#define PSXDMA_LOG (varLog.IopDMA) && SrcLog_PSXDMA +#define PSXCNT_LOG (varLog.IopCnt) && SrcLog_PSXCNT //memcard has the same number as PAD_LOG for now -#define MEMCARDS_LOG (varLog & 0x02000000) && SrcLog_MEMCARDS -#define PAD_LOG (varLog & 0x02000000) && SrcLog_PAD -#define GTE_LOG (varLog & 0x04000000) && SrcLog_GTE -#define CDR_LOG (varLog & 0x08000000) && SrcLog_CDR -#define GPU_LOG (varLog & 0x10000000) && SrcLog_GPU +#define MEMCARDS_LOG (varLog.Memcards) && SrcLog_MEMCARDS +#define PAD_LOG (varLog.Pad) && SrcLog_PAD +#define CDR_LOG (varLog.CDVD) && SrcLog_CDR +#define GPU_LOG (varLog.GPU) && SrcLog_GPU // fixme - currently we don't log cache #define CACHE_LOG 0&& diff --git a/pcsx2/Elfheader.cpp b/pcsx2/Elfheader.cpp index 020412431b..358a1cc672 100644 --- a/pcsx2/Elfheader.cpp +++ b/pcsx2/Elfheader.cpp @@ -148,7 +148,7 @@ static uint parseCommandLine( const char *filename ) { // 4 + 4 + 256 const char * p; int argc; - int i; + int i; args_ptr -= 256; @@ -159,59 +159,47 @@ static uint parseCommandLine( const char *filename ) p = strrchr( filename, '\\' ); #else //linux p = strrchr( filename, '/' ); - if( p == NULL ) - p = strchr(filename, '\\'); + if( p == NULL ) p = strchr(filename, '\\'); #endif - if ( p ) - { - p++; - } + if (p) + p++; else - { - p = filename; - } + p = filename; + args_ptr -= strlen( p ) + 1; - /* if ( args_ptr < 0 ) // fixme- This is still impossible. - { - return 0; - }*/ + strcpy( (char*)&PS2MEM_BASE[ args_ptr ], p ); //fill param 0; i.e. name of the program for ( i = strlen( p ) + 1 + 256, argc = 0; i > 0; i-- ) - { - while ( i && ( ( PS2MEM_BASE[ args_ptr + i ] == 0 ) || ( PS2MEM_BASE[ args_ptr + i ] == 32 ) ) ) - { - i--; - } - if ( PS2MEM_BASE[ args_ptr + i + 1 ] == ' ' ) - { - PS2MEM_BASE[ args_ptr + i + 1 ] = 0; - } - while ( i && ( PS2MEM_BASE[ args_ptr + i ] != 0 ) && ( PS2MEM_BASE[ args_ptr + i] != 32 ) ) - { - i--; - } - if ( ( PS2MEM_BASE[ args_ptr + i ] != 0 ) && ( PS2MEM_BASE[ args_ptr + i ] != 32 ) ) + { + while (i && ((PS2MEM_BASE[ args_ptr + i ] == 0) || (PS2MEM_BASE[ args_ptr + i ] == 32))) + { i--; } + + if ( PS2MEM_BASE[ args_ptr + i + 1 ] == ' ') PS2MEM_BASE[ args_ptr + i + 1 ] = 0; + + while (i && (PS2MEM_BASE[ args_ptr + i ] != 0) && (PS2MEM_BASE[ args_ptr + i] != 32)) + { i--; } + + if ((PS2MEM_BASE[ args_ptr + i ] != 0) && (PS2MEM_BASE[ args_ptr + i ] != 32)) { //i==0 argc++; + if ( args_ptr - 4 - 4 - argc * 4 < 0 ) // fixme - Should this be cast to a signed int? - { - return 0; - } + return 0; + ((u32*)PS2MEM_BASE)[ args_ptr / 4 - argc ] = args_ptr + i; } - else - { + else + { if ( ( PS2MEM_BASE[ args_ptr + i + 1 ] != 0 ) && ( PS2MEM_BASE[ args_ptr + i + 1 ] != 32 ) ) { argc++; if ( args_ptr - 4 - 4 - argc * 4 < 0 ) // fixme - Should this be cast to a signed int? - { return 0; - } + ((u32*)PS2MEM_BASE)[ args_ptr / 4 - argc ] = args_ptr + i + 1; } - } + } } ((u32*)PS2MEM_BASE)[ args_ptr /4 - argc - 1 ] = argc; //how many args ((u32*)PS2MEM_BASE)[ args_ptr /4 - argc - 2 ] = ( argc > 0); //have args? //not used, cannot be filled at all @@ -311,10 +299,10 @@ struct ElfObject if ((strnicmp( filename.c_str(), "cdrom0:", strlen("cdromN:")) == 0) || (strnicmp( filename.c_str(), "cdrom1:", strlen("cdromN:")) == 0)) { - int fi; - fi = CDVDFS_open(filename.c_str() + strlen("cdromN:"), 1);//RDONLY - if (fi < 0) - throw Exception::FileNotFound( filename ); + int fi = CDVDFS_open(filename.c_str() + strlen("cdromN:"), 1);//RDONLY + + if (fi < 0) throw Exception::FileNotFound( filename ); + CDVDFS_lseek( fi, 0, SEEK_SET ); rsize = CDVDFS_read( fi, (char*)data.GetPtr(), data.GetSizeInBytes() ); CDVDFS_close( fi ); @@ -324,15 +312,14 @@ struct ElfObject FILE *f; f = fopen( filename.c_str(), "rb" ); - if( f == NULL ) - Exception::FileNotFound( filename ); + if( f == NULL ) Exception::FileNotFound( filename ); + fseek( f, 0, SEEK_SET ); rsize = fread( data.GetPtr(), 1, data.GetSizeInBytes(), f ); fclose( f ); } - if( rsize < data.GetSizeInBytes() ) - throw Exception::EndOfStream( filename ); + if( rsize < data.GetSizeInBytes() ) throw Exception::EndOfStream( filename ); } u32 GetCRC() const @@ -426,37 +413,14 @@ struct ElfObject switch ( secthead[ i ].sh_type ) { - default: - ELF_LOG("unknown %08x",secthead[i].sh_type); - break; - - case 0x0: - ELF_LOG("null"); - break; - - case 0x1: - ELF_LOG("progbits"); - break; - - case 0x2: - ELF_LOG("symtab"); - break; - - case 0x3: - ELF_LOG("strtab"); - break; - - case 0x4: - ELF_LOG("rela"); - break; - - case 0x8: - ELF_LOG("no bits"); - break; - - case 0x9: - ELF_LOG("rel"); - break; + case 0x0: ELF_LOG("null"); break; + case 0x1: ELF_LOG("progbits"); break; + case 0x2: ELF_LOG("symtab"); break; + case 0x3: ELF_LOG("strtab"); break; + case 0x4: ELF_LOG("rela"); break; + case 0x8: ELF_LOG("no bits"); break; + case 0x9: ELF_LOG("rel"); break; + default: ELF_LOG("unknown %08x",secthead[i].sh_type); break; } ELF_LOG("\n"); @@ -619,17 +583,6 @@ void LoadGameSpecificSettings() g_FFXHack = 0; switch(ElfCRC) { - // The code involving VUFIX_SIGNEDZERO & VUFIX_EXTRAFLAGS - // is no longer in pcsx2. - - //case 0x0c414549: // spacefisherman, missing gfx - // g_VUGameFixes |= VUFIX_SIGNEDZERO; - // break; - //case 0x4C9EE7DF: // crazy taxi (u) - //case 0xC9C145BF: // crazy taxi, missing gfx - // g_VUGameFixes |= VUFIX_EXTRAFLAGS; - // break; - case 0xb99379b7: // erementar gerad (discolored chars) g_VUGameFixes |= VUFIX_XGKICKDELAY2; // Tested - still needed - arcum42 break; diff --git a/pcsx2/FiFo.cpp b/pcsx2/FiFo.cpp index 006682c34b..952c6766d7 100644 --- a/pcsx2/FiFo.cpp +++ b/pcsx2/FiFo.cpp @@ -184,11 +184,9 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value) } else { - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); GSGIFTRANSFER3((u32*)value, 1); - FreezeMMXRegs(0); - FreezeXMMRegs(0); + FreezeRegs(0); } } diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index a7c3011cc1..bdcbace6d7 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -220,14 +220,12 @@ void GIFdma() // When MTGS is enabled, Gifchain calls WRITERING_DMA, which calls GSRINGBUF_DONECOPY, which freezes // the registers inside of the FreezeXMMRegs calls here and in the other two below.. // I'm not really sure that is intentional. --arcum42 - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); GIFchain(); - FreezeXMMRegs(0); // Theres a comment below that says not to unfreeze the xmm regs, so not sure about this. - FreezeMMXRegs(0); + FreezeRegs(0); // Theres a comment below that says not to unfreeze the xmm regs, so not sure about this. if((gspath3done == 1 || (gif->chcr & 0xc) == 0) && gif->qwc == 0){ - if(gif->qwc > 0) SysPrintf("Horray\n"); + if(gif->qwc > 0) SysPrintf("Hurray\n"); gspath3done = 0; gif->chcr &= ~0x100; //psHu32(GIF_MODE)&= ~0x4; @@ -248,11 +246,9 @@ void GIFdma() if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80 && (gif->chcr & 0xc) == 0) { SysPrintf("DMA Stall Control on GIF normal\n"); } - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); GIFchain(); //Transfers the data set by the switch - FreezeXMMRegs(0); - FreezeMMXRegs(0); + FreezeRegs(0); if(gif->qwc == 0 && (gif->chcr & 0xc) == 0) gspath3done = 1; return; } @@ -298,12 +294,9 @@ void GIFdma() return; } } - - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); GIFchain(); //Transfers the data set by the switch - FreezeXMMRegs(0); - FreezeMMXRegs(0); + FreezeRegs(0); if ((gif->chcr & 0x80) && ptag[0] >> 31) { //Check TIE bit of CHCR and IRQ bit of tag GIF_LOG("dmaIrq Set\n"); @@ -534,15 +527,13 @@ void mfifoGIFtransfer(int qwc) { gifmfifoirq = 1; } } - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); if (mfifoGIFchain() == -1) { SysPrintf("GIF dmaChain error size=%d, madr=%lx, tadr=%lx\n", gif->qwc, gif->madr, gif->tadr); gifstate = GIF_STATE_STALL; } - FreezeXMMRegs(0); - FreezeMMXRegs(0); + FreezeRegs(0); if(gif->qwc == 0 && gifstate == GIF_STATE_DONE) gifstate = GIF_STATE_STALL; CPU_INT(11,mfifocycles); diff --git a/pcsx2/HostGui.h b/pcsx2/HostGui.h index e191d87dce..3620d36cd8 100644 --- a/pcsx2/HostGui.h +++ b/pcsx2/HostGui.h @@ -19,28 +19,34 @@ #pragma once ////////////////////////////////////////////////////////////////////////////////////////// -// TestRun Parameters. +// Startup Parameters. -struct TESTRUNARGS +enum StartupMode { - u8 enabled; - u8 jpgcapture; - - uint frame; // if == 0, frame is unlimited (run until crash). - int numimages; - int curimage; - u32 autopad; // mask for auto buttons - bool efile; - int snapdone; - - const char* ptitle; - const char* pimagename; - const char* plogname; - const char* pgsdll, *pcdvddll, *pspudll; - const char* ppad1dll, *ppad2dll, *pdev9dll; + BootMode_Bios, + BootMode_Quick, + BootMode_Elf }; -extern TESTRUNARGS g_TestRun; +class StartupParams +{ +public: + // Name of the CDVD or ELF image to load. + // if NULL, the CDVD configured settings are used. + const char* ImageName; + + bool NoGui; + bool Enabled; + StartupMode BootMode; + + // Plugin overrides + const char* gsdll, *cdvddll, *spudll; + const char* pad1dll, *pad2dll, *dev9dll; + + StartupParams() { memzero_obj(*this); } +}; + +extern StartupParams g_Startup; ////////////////////////////////////////////////////////////////////////////////////////// // Core Gui APIs (shared by all platforms) diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 48896cbeef..312ff8b845 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -58,7 +58,7 @@ enum BCb_COEFF = 0x40 }; -static PCSX2_ALIGNED16(const SSE2_Tables sse2_tables) = +static volatile PCSX2_ALIGNED16(const SSE2_Tables sse2_tables) = { {0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}, // c_bias {16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}, // y_bias @@ -223,8 +223,8 @@ ihatemsvc: // Use ecx and edx as base pointers, to allow for Mod/RM form on memOps. // This saves 2-3 bytes per instruction where these are used. :) - "mov ecx, offset yuv2rgb_temp\n" - "mov edx, offset sse2_tables+64\n" + "mov ecx, offset %c[yuv2rgb_temp]\n" + "mov edx, offset %c[sse2_tables]+64\n" ".align 16\n" "tworows:\n" @@ -240,15 +240,15 @@ ihatemsvc: // unfortunately I don't think this will matter despite being // technically potentially a little faster, but this is // equivalent to an add or sub - "pxor xmm2, xmmword ptr [edx-0x40]\n" // xmm2 <-- 8 x (Cb - 128) << 8 - "pxor xmm0, xmmword ptr [edx-0x40]\n" // xmm0 <-- 8 x (Cr - 128) << 8 + "pxor xmm2, xmmword ptr [edx+%c[C_BIAS]]\n" // xmm2 <-- 8 x (Cb - 128) << 8 + "pxor xmm0, xmmword ptr [edx+%c[C_BIAS]]\n" // xmm0 <-- 8 x (Cr - 128) << 8 "movaps xmm1, xmm0\n" "movaps xmm3, xmm2\n" - "pmulhw xmm1, xmmword ptr [edx+0x10]\n" - "pmulhw xmm3, xmmword ptr [edx+0x20]\n" - "pmulhw xmm0, xmmword ptr [edx+0x30]\n" - "pmulhw xmm2, xmmword ptr [edx+0x40]\n" + "pmulhw xmm1, xmmword ptr [edx+%c[GCr_COEFF]]\n" + "pmulhw xmm3, xmmword ptr [edx+%c[GCb_COEFF]]\n" + "pmulhw xmm0, xmmword ptr [edx+%c[RCr_COEFF]]\n" + "pmulhw xmm2, xmmword ptr [edx+%c[BCb_COEFF]]\n" "paddsw xmm1, xmm3\n" // store for the next line; looking at the code above // compared to the code below, I have to wonder whether @@ -270,13 +270,13 @@ ihatemsvc: "movaps xmm5, xmm2\n" "movaps xmm6, xmmword ptr [mb8+edi]\n" - "psubusb xmm6, xmmword ptr [edx-0x30]\n" + "psubusb xmm6, xmmword ptr [edx+%c[Y_BIAS]]\n" "movaps xmm7, xmm6\n" "psllw xmm6, 8\n" // xmm6 <- Y << 8 for pixels 0,2,4,6,8,10,12,14 - "pand xmm7, xmmword ptr [edx+Y_MASK]\n" // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15 + "pand xmm7, xmmword ptr [edx+%c[Y_MASK]]\n" // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15 - "pmulhuw xmm6, xmmword ptr [edx+0x00]\n" - "pmulhuw xmm7, xmmword ptr [edx+0x00]\n" + "pmulhuw xmm6, xmmword ptr [edx+%c[Y_COEFF]]\n" + "pmulhuw xmm7, xmmword ptr [edx+%c[Y_COEFF]]\n" "paddsw xmm0, xmm6\n" "paddsw xmm3, xmm7\n" @@ -286,7 +286,7 @@ ihatemsvc: "paddsw xmm5, xmm7\n" // round - "movaps xmm6, xmmword ptr [edx-0x10]\n" + "movaps xmm6, xmmword ptr [edx+%c[ROUND_1BIT]]\n" "paddw xmm0, xmm6\n" "paddw xmm1, xmm6\n" "paddw xmm2, xmm6\n" @@ -342,6 +342,12 @@ ihatemsvc: "cmp esi, 64\n" "jne tworows\n" ".att_syntax\n" + : + :[C_BIAS]"i"(C_BIAS), [Y_BIAS]"i"(Y_BIAS), [Y_MASK]"i"(Y_MASK), + [ROUND_1BIT]"i"(ROUND_1BIT), [Y_COEFF]"i"(Y_COEFF), [GCr_COEFF]"i"(GCr_COEFF), + [GCb_COEFF]"i"(GCb_COEFF), [RCr_COEFF]"i"(RCr_COEFF), [BCb_COEFF]"i"(BCb_COEFF), + [yuv2rgb_temp]"i"(yuv2rgb_temp), [sse2_tables]"i"(&sse2_tables) + : ); #else #error Unsupported compiler diff --git a/pcsx2/IopBios.cpp b/pcsx2/IopBios.cpp index bb84b6dc85..029b888cc7 100644 --- a/pcsx2/IopBios.cpp +++ b/pcsx2/IopBios.cpp @@ -19,7 +19,7 @@ #include "PrecompiledHeader.h" #include -#include "PsxCommon.h" +#include "IopCommon.h" namespace R3000A { diff --git a/pcsx2/PsxCommon.h b/pcsx2/IopCommon.h similarity index 77% rename from pcsx2/PsxCommon.h rename to pcsx2/IopCommon.h index 1e7683cdf8..27b8341f4d 100644 --- a/pcsx2/PsxCommon.h +++ b/pcsx2/IopCommon.h @@ -15,37 +15,25 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef __PSXCOMMON_H__ -#define __PSXCOMMON_H__ - -#include "PS2Etypes.h" - -#include - -#include "System.h" - -extern long LoadCdBios; -extern int cdOpenCase; +#ifndef __IOPCOMMON_H__ +#define __IOPCOMMON_H__ #define PSXCLK (36864000ULL) /* 36.864 Mhz */ -#include "Plugins.h" -#include "Misc.h" -#include "SaveState.h" - #include "R3000A.h" +#include "Common.h" + +#include "CdRom.h" +#include "CDVD.h" + +#include "Sio.h" +#include "Sif.h" + #include "IopMem.h" #include "IopHw.h" #include "IopBios.h" #include "IopDma.h" #include "IopCounters.h" -#include "CdRom.h" -#include "Sio.h" -#include "DebugTools/Debug.h" #include "IopSio2.h" -#include "CDVD.h" -#include "Memory.h" -#include "Hw.h" -#include "Sif.h" -#endif /* __PSXCOMMON_H__ */ +#endif /* __IOPCOMMON_H__ */ diff --git a/pcsx2/IopCounters.cpp b/pcsx2/IopCounters.cpp index 798af8d94f..10027f1ed5 100644 --- a/pcsx2/IopCounters.cpp +++ b/pcsx2/IopCounters.cpp @@ -23,7 +23,7 @@ #include "PrecompiledHeader.h" #include -#include "PsxCommon.h" +#include "IopCommon.h" /* Config.PsxType == 1: PAL: VBlank interlaced 50.00 Hz diff --git a/pcsx2/IopDma.cpp b/pcsx2/IopDma.cpp index 53995dfb1d..d90b247aaa 100644 --- a/pcsx2/IopDma.cpp +++ b/pcsx2/IopDma.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" +#include "IopCommon.h" using namespace R3000A; diff --git a/pcsx2/IopHw.cpp b/pcsx2/IopHw.cpp index 80ea92c464..073102ef86 100644 --- a/pcsx2/IopHw.cpp +++ b/pcsx2/IopHw.cpp @@ -18,8 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" -#include "Misc.h" +#include "IopCommon.h" #include "iR5900.h" // NOTE: Any modifications to read/write fns should also go into their const counterparts diff --git a/pcsx2/IopMem.cpp b/pcsx2/IopMem.cpp index 18d45cf737..0a4d04e582 100644 --- a/pcsx2/IopMem.cpp +++ b/pcsx2/IopMem.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "VU.h" #include "iCore.h" #include "Hw.h" diff --git a/pcsx2/IopSio2.cpp b/pcsx2/IopSio2.cpp index ebece56760..04e704e47a 100644 --- a/pcsx2/IopSio2.cpp +++ b/pcsx2/IopSio2.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" +#include "IopCommon.h" sio2Struct sio2; diff --git a/pcsx2/Linux/Linux.h b/pcsx2/Linux/Linux.h index af627516d8..3db9330c95 100644 --- a/pcsx2/Linux/Linux.h +++ b/pcsx2/Linux/Linux.h @@ -22,6 +22,7 @@ #include "PrecompiledHeader.h" #include "Paths.h" #include "Common.h" +#include "HostGui.h" //For CpuDlg #include "Counters.h" @@ -53,7 +54,6 @@ extern "C" extern void SaveConfig(); extern int LoadConfig(); extern void SysRestorableReset(); -extern bool UseGui; extern int Pcsx2Configure(); diff --git a/pcsx2/Linux/LnxConsole.cpp b/pcsx2/Linux/LnxConsole.cpp index 076caa864d..c399b9ec4b 100644 --- a/pcsx2/Linux/LnxConsole.cpp +++ b/pcsx2/Linux/LnxConsole.cpp @@ -94,7 +94,7 @@ bool Alert(const char* fmt) { GtkWidget *dialog; - if (!UseGui) + if (g_Startup.NoGui) { Console::Error(fmt); return false; @@ -126,7 +126,7 @@ bool Alert(const char* fmt, VARG_PARAM dummy, ...) if (msg[msg.length()-1] == '\n') msg[msg.length()-1] = 0; - if (!UseGui) + if (g_Startup.NoGui) { Console::Error(msg.c_str()); return false; diff --git a/pcsx2/Linux/LnxMain.cpp b/pcsx2/Linux/LnxMain.cpp index b195743e45..4f764cfac7 100644 --- a/pcsx2/Linux/LnxMain.cpp +++ b/pcsx2/Linux/LnxMain.cpp @@ -59,16 +59,11 @@ int main(int argc, char *argv[]) #endif #ifdef PCSX2_DEVBUILD - memset(&g_TestRun, 0, sizeof(g_TestRun)); + memset(&g_Startup, 0, sizeof(g_Startup)); #endif if (!ParseCommandLine(argc, argv, file)) return 0; -#ifdef PCSX2_DEVBUILD - g_TestRun.efile = efile; - g_TestRun.ptitle = file; -#endif - // make gtk thread safe if using MTGS if (CHECK_MULTIGS) { @@ -76,7 +71,7 @@ int main(int argc, char *argv[]) gdk_threads_init(); } - if (UseGui) + if (!g_Startup.NoGui) { gtk_init(NULL, NULL); } @@ -133,7 +128,7 @@ int main(int argc, char *argv[]) } #endif - if (UseGui && (file == NULL)) + if (!g_Startup.NoGui && (file == NULL)) { StartGui(); return 0; @@ -315,7 +310,7 @@ gboolean OnDelete(GtkWidget *widget, GdkEvent *event, gpointer user_data) int Pcsx2Configure() { - if (!UseGui) return 0; + if (g_Startup.NoGui) return 0; MainWindow = NULL; OnConf_Conf(NULL, 0); @@ -397,17 +392,17 @@ void pcsx2_exit() printf("PCSX2 Quitting\n"); - if (UseGui) + if (g_Startup.NoGui) + { + SysClose(); + exit(0); + } + else { gtk_main_quit(); SysClose(); gtk_exit(0); } - else - { - SysClose(); - exit(0); - } } void SignalExit(int sig) diff --git a/pcsx2/Linux/LnxSysExec.cpp b/pcsx2/Linux/LnxSysExec.cpp index 6bd70d48df..0080dff9bc 100644 --- a/pcsx2/Linux/LnxSysExec.cpp +++ b/pcsx2/Linux/LnxSysExec.cpp @@ -20,8 +20,6 @@ #include "LnxSysExec.h" #include "HostGui.h" -bool UseGui = true; - static bool sinit = false; GtkWidget *FileSel; @@ -68,6 +66,7 @@ void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * ) bool ParseCommandLine(int argc, char *argv[], char *file) { int i = 1; + g_Startup.BootMode = BootMode_Bios; while (i < argc) { @@ -78,84 +77,50 @@ bool ParseCommandLine(int argc, char *argv[], char *file) //Msgbox::Alert( phelpmsg ); return false; } - else if (stricmp(token, "-efile") == 0) - { - token = argv[i++]; - if (token != NULL) - { - efile = atoi(token); - } - } else if (stricmp(token, "-nogui") == 0) { - UseGui = FALSE; + g_Startup.NoGui = FALSE; } else if (stricmp(token, "-loadgs") == 0) { g_pRunGSState = argv[i++]; - } -#ifdef PCSX2_DEVBUILD - else if (stricmp(token, "-image") == 0) - { - g_TestRun.pimagename = argv[i++]; - } - else if (stricmp(token, "-log") == 0) - { - g_TestRun.plogname = argv[i++]; - } - else if (stricmp(token, "-logopt") == 0) + } + else if( strcmp(token, "-bootmode" ) == 0) { token = argv[i++]; - if (token != NULL) - { - if (token[0] == '0' && token[1] == 'x') token += 2; - sscanf(token, "%x", &varLog); - } - } - else if (stricmp(token, "-frame") == 0) - { - token = argv[i++]; - if (token != NULL) - { - g_TestRun.frame = atoi(token); - } - } - else if (stricmp(token, "-numimages") == 0) - { - token = argv[i++]; - if (token != NULL) - { - g_TestRun.numimages = atoi(token); - } - } - else if (stricmp(token, "-jpg") == 0) - { - g_TestRun.jpgcapture = 1; + g_Startup.BootMode = (StartupMode)atoi( token); + g_Startup.Enabled = true; } else if (stricmp(token, "-gs") == 0) { token = argv[i++]; - g_TestRun.pgsdll = token; + g_Startup.gsdll = token; } else if (stricmp(token, "-cdvd") == 0) { token = argv[i++]; - g_TestRun.pcdvddll = token; + g_Startup.cdvddll = token; } else if (stricmp(token, "-spu") == 0) { token = argv[i++]; - g_TestRun.pspudll = token; + g_Startup.spudll = token; } - else if (stricmp(token, "-test") == 0) - { - g_TestRun.enabled = 1; - } -#endif else if (stricmp(token, "-pad") == 0) { token = argv[i++]; - printf("-pad ignored\n"); + g_Startup.pad1dll = token; + g_Startup.pad2dll = token; + } + else if (stricmp(token, "-pad1") == 0) + { + token = argv[i++]; + g_Startup.pad1dll = token; + } + else if (stricmp(token, "-pad2") == 0) + { + token = argv[i++]; + g_Startup.pad2dll = token; } else if (stricmp(token, "-loadgs") == 0) { @@ -183,9 +148,33 @@ void SysPrintf(const char *fmt, ...) Console::Write(msg); } +static std::string str_Default( "default" ); + void RunGui() { + PCSX2_MEM_PROTECT_BEGIN(); + + LoadPatch( str_Default ); + if( g_Startup.NoGui || g_Startup.Enabled ) + { + // Initially bypass GUI and start PCSX2 directly. + // Manually load plugins using the user's configured image (if non-elf). + + if( g_Startup.Enabled && (g_Startup.BootMode != BootMode_Elf) ) + { + if (OpenPlugins(g_Startup.ImageName) == -1) + return; + } + + SysPrepareExecution( + (g_Startup.BootMode == BootMode_Elf) ? g_Startup.ImageName : NULL, + (g_Startup.BootMode == BootMode_Bios) + ); + } + StartGui(); + + PCSX2_MEM_PROTECT_END(); } void OnStates_Load(GtkMenuItem *menuitem, gpointer user_data) @@ -319,8 +308,6 @@ bool SysInit() mkdir(LOGS_DIR, 0755); #ifdef PCSX2_DEVBUILD - if (g_TestRun.plogname != NULL) - emuLog = fopen(g_TestRun.plogname, "w"); if (emuLog == NULL) emuLog = fopen(LOGS_DIR "/emuLog.txt", "wb"); #endif @@ -525,7 +512,7 @@ namespace HostGui #endif SysEndExecution(); - if (!UseGui) exit(0); + if (g_Startup.NoGui) exit(0); // fixme: The GUI is now capable of receiving control back from the // emulator. Which means that when we call SysEscapeExecute() here, the diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index 159163c9d8..485ba722d7 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -677,16 +677,14 @@ int mtgsThreadObject::Callback() void mtgsThreadObject::WaitGS() { // Freeze registers because some kernel code likes to destroy them - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); SetEvent(); while( volatize(m_RingPos) != volatize(m_WritePos) ) { Timeslice(); //SpinWait(); } - FreezeXMMRegs(0); - FreezeMMXRegs(0); + FreezeRegs(0); } // Sets the gsEvent flag and releases a timeslice. @@ -701,8 +699,7 @@ void mtgsThreadObject::SetEvent() void mtgsThreadObject::PrepEventWait() { // Freeze registers because some kernel code likes to destroy them - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); //Console::Notice( "MTGS Stall! EE waits for nothing! ... except your GPU sometimes." ); SetEvent(); Timeslice(); @@ -710,8 +707,7 @@ void mtgsThreadObject::PrepEventWait() void mtgsThreadObject::PostEventWait() const { - FreezeMMXRegs(0); - FreezeXMMRegs(0); + FreezeRegs(0); } u8* mtgsThreadObject::GetDataPacketPtr() const @@ -770,12 +766,10 @@ void mtgsThreadObject::SendDataPacket() m_CopyDataTally += m_packet_size; if( ( m_CopyDataTally > 0x8000 ) || ( ++m_CopyCommandTally > 16 ) ) { - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); //Console::Status( "MTGS Kick! DataSize : 0x%5.8x, CommandTally : %d", m_CopyDataTally, m_CopyCommandTally ); SetEvent(); - FreezeMMXRegs(0); - FreezeXMMRegs(0); + FreezeRegs(0); } } diff --git a/pcsx2/Makefile.am b/pcsx2/Makefile.am index 3dcb5723ec..4f6b70356e 100644 --- a/pcsx2/Makefile.am +++ b/pcsx2/Makefile.am @@ -18,7 +18,7 @@ RecoverySystem.cpp Saveslots.cpp libpcsx2_a_SOURCES += \ CDVD.h CDVDiso.h CDVDisodrv.h CDVDlib.h COP0.h Cache.h CdRom.h Common.h Counters.h Decode_XA.h EEregs.h \ Elfheader.h Exceptions.h GS.h Hw.h IopBios.h IopBios2.h IopCounters.h IopDma.h IopHw.h IopMem.h IopSio2.h Memcpyfast.h \ -Memory.h MemoryCard.h Misc.h Patch.h Paths.h Plugins.h PrecompiledHeader.h PsxCommon.h R3000A.h R5900.h R5900OpcodeTables.h \ +Memory.h MemoryCard.h Misc.h Patch.h Paths.h Plugins.h PrecompiledHeader.h IopCommon.h R3000A.h R5900.h R5900OpcodeTables.h \ SPR.h SamplProf.h SaveState.h Sif.h Sifcmd.h Sio.h SafeArray.h Stats.h StringUtils.h System.h Threading.h \ VU.h VUflags.h VUmicro.h VUops.h Vif.h VifDma.h cheatscpp.h vtlb.h NakedAsm.h R5900Exceptions.h HostGui.h Pcsx2Config.h diff --git a/pcsx2/Mdec.cpp b/pcsx2/Mdec.cpp index 3b16800591..839c84669b 100644 --- a/pcsx2/Mdec.cpp +++ b/pcsx2/Mdec.cpp @@ -21,8 +21,7 @@ #include #include -#include "Misc.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "Mdec.h" int iq_y[DCTSIZE2],iq_uv[DCTSIZE2]; diff --git a/pcsx2/MemcpyFast.h b/pcsx2/MemcpyFast.h index 844de2ff62..652170be88 100644 --- a/pcsx2/MemcpyFast.h +++ b/pcsx2/MemcpyFast.h @@ -19,46 +19,30 @@ #ifndef __MEMCPY_FAST_H__ #define __MEMCPY_FAST_H__ -void _memset16_unaligned( void* dest, u16 data, size_t size ); +#if defined(_WIN32) + #include "windows/memzero.h" +#else + #include "Linux/memzero.h" +#endif // WIN32 -#if defined(_WIN32) && !defined(__x86_64__) + // Only used in the Windows version of memzero.h. But it's in Misc.cpp for some reason. + void _memset16_unaligned( void* dest, u16 data, size_t size ); // The new simplified memcpy_amd_ is now faster than memcpy_raz_. // memcpy_amd_ also does mmx register saving, negating the need for freezeregs (code cleanup!) // Additionally, using one single memcpy implementation keeps the code cache cleaner. - //extern void __fastcall memcpy_raz_udst(void *dest, const void *src, size_t bytes); - //extern void __fastcall memcpy_raz_usrc(void *dest, const void *src, size_t bytes); - //extern void __fastcall memcpy_raz_(void *dest, const void *src, size_t bytes); +#ifdef __LINUX__ + extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes); + extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize); + extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize); +#else extern void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes); extern u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize); extern void memxor_mmx(void* dst, const void* src1, int cmpsize); +#endif -# include "windows/memzero.h" -# define memcpy_fast memcpy_amd_ -# define memcpy_aligned memcpy_amd_ - -#else - - // for now linux uses the GCC memcpy/memset implementations. - //#define memcpy_raz_udst memcpy - //#define memcpy_raz_usrc memcpy - //#define memcpy_raz_ memcpy - - // fast_routines.S - extern "C" u8 memcmp_mmx(const void* src1, const void* src2, int cmpsize); - extern "C" void memxor_mmx(void* dst, const void* src1, int cmpsize); - -# include "Linux/memzero.h" -#if defined(LINUX_USE_FAST_MEMORY) -# define memcpy_fast memcpy_amd_ -# define memcpy_aligned memcpy_amd_ - extern "C" void __fastcall memcpy_amd_(void *dest, const void *src, size_t bytes); -#else -# define memcpy_fast memcpy -# define memcpy_aligned memcpy -#endif // LINUX_USE_FAST_MEMORY - -#endif // WIN32 - + #define memcpy_fast memcpy_amd_ + #define memcpy_aligned memcpy_amd_ + #endif //Header diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 21098ad649..31c604598f 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -43,10 +43,9 @@ BIOS #include -#include "Common.h" +#include "IopCommon.h" #include "iR5900.h" -#include "PsxCommon.h" #include "VUmicro.h" #include "GS.h" #include "IPU/IPU.h" diff --git a/pcsx2/MemoryCard.cpp b/pcsx2/MemoryCard.cpp index 079309c0ec..b6805cdf3f 100644 --- a/pcsx2/MemoryCard.cpp +++ b/pcsx2/MemoryCard.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "MemoryCard.h" #include "Paths.h" diff --git a/pcsx2/Misc.cpp b/pcsx2/Misc.cpp index 59481b973f..0ea9240e02 100644 --- a/pcsx2/Misc.cpp +++ b/pcsx2/Misc.cpp @@ -26,8 +26,7 @@ #include -#include "Common.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "HostGui.h" #include "CDVDisodrv.h" @@ -52,6 +51,11 @@ char CdromId[12]; static int g_Pcsx2Recording = 0; // true 1 if recording video and sound bool renderswitch = 0; +#define NUM_STATES 10 +int StatesC = 0; +extern char strgametitle[256]; + + const char *LabelAuthors = { "PCSX2, a PS2 emulator\n\n" "Active Devs: Arcum42, Refraction,\n" @@ -80,73 +84,22 @@ const char *LabelGreets = { "F|RES, MrBrown, razorblade, Seta-san, Skarmeth" }; -static struct { - const char *name; - u32 size; -} ioprps[]={ - {"IOPRP14", 43845}, - {"IOPRP142", 48109}, - {"IOPRP143", 58317}, - {"IOPRP144", 58525}, - {"IOPRP15", 82741}, - {"IOPRP151", 82917}, - {"IOPRP153", 82949}, - {"IOPRP16", 91909}, - {"IOPRP165", 98901}, - {"IOPRP20", 109809}, - {"IOPRP202", 110993}, - {"IOPRP205", 119797}, - {"IOPRP21", 126857}, - {"IOPRP211", 129577}, - {"IOPRP213", 129577}, - {"IOPRP214", 140945}, - {"IOPRP22", 199257}, - {"IOPRP221", 196937}, - {"IOPRP222", 198233}, - {"IOPRP224", 201065}, - {"IOPRP23", 230329}, - {"IOPRP234", 247641}, - {"IOPRP24", 251065}, - {"IOPRP241", 251049}, - {"IOPRP242", 252409}, - {"IOPRP243", 253201}, - {"IOPRP250", 264897}, - {"IOPRP252", 265233}, - {"IOPRP253", 267217}, - {"IOPRP254", 264449}, - {"IOPRP255", 264449}, - {"IOPRP260", 248945}, - {"IOPRP270", 249121}, - {"IOPRP271", 266817}, - {"IOPRP280", 269889}, - {"IOPRP300", 275345}, - {"DNAS280", 272753}, - {"DNAS270", 251729}, - {"DNAS271", 268977}, - {"DNAS300", 278641}, - {"DNAS280", 272705}, - {"DNAS255", 264945}, - {NULL, 0} -}; +#define DIRENTRY_SIZE 16 -void GetRPCVersion(char *ioprp, char *rpcver){ - char *p=ioprp; - int i; - struct TocEntry te; - - if (p && (CDVD_findfile(p+strlen("cdromN:"), &te) != -1)){ - for (i=0; ioprps[i].size>0; i++) - if (te.fileSize==ioprps[i].size) - break; - if (ioprps[i].size>0) - p=(char *)ioprps[i].name; - } - // fixme - Is p really supposed to be set in the middle of an if statement? - if (p && (p=strstr(p, "IOPRP")+strlen("IOPRP"))){ - for (i=0;(i<4) && p && (*p>='0') && (*p<='9');i++, p++) rpcver[i]=*p; - for ( ; i<4 ;i++ ) rpcver[i]='0'; - } -} +#if defined(_MSC_VER) +#pragma pack(1) +#endif + +struct romdir{ + char fileName[10]; + u16 extInfoSize; + u32 fileSize; +#if defined(_MSC_VER) +}; +#pragma pack() //+22 +#else +} __attribute__((packed)); +#endif u32 GetBiosVersion() { unsigned int fileOffset=0; @@ -267,44 +220,6 @@ int IsBIOS(char *filename, char *description) return FALSE; //fail quietly } -// LOAD STUFF - -// fixme - Is there any reason why we shouldn't delete this define, and replace the array lengths -// with the actual numbers? -#define ISODCL(from, to) (to - from + 1) - -struct iso_directory_record { - char length [ISODCL (1, 1)]; /* length[1]; 711 */ - char ext_attr_length [ISODCL (2, 2)]; /* ext_attr_length[1]; 711 */ - char extent [ISODCL (3, 10)]; /* extent[8]; 733 */ - char size [ISODCL (11, 18)]; /* size[8]; 733 */ - char date [ISODCL (19, 25)]; /* date[7]; 7 by 711 */ - char flags [ISODCL (26, 26)]; /* flags[1]; */ - char file_unit_size [ISODCL (27, 27)]; /* file_unit_size[1]; 711 */ - char interleave [ISODCL (28, 28)]; /* interleave[1]; 711 */ - char volume_sequence_number [ISODCL (29, 32)]; /* volume_sequence_number[3]; 723 */ - unsigned char name_len [ISODCL (33, 33)]; /* name_len[1]; 711 */ - char name [1]; -}; - -int LoadCdrom() { - return 0; -} - -int CheckCdrom() { - u8 *buf; - - if (CDVDreadTrack(16, CDVD_MODE_2352) == -1) - return -1; - buf = CDVDgetBuffer(); - if (buf == NULL) - return -1; - - strncpy(CdromId, (char*)buf+52, 10); - - return 0; -} - int GetPS2ElfName(char *name){ int f; char buffer[g_MaxPath];//if a file is longer...it should be shorter :D @@ -351,7 +266,6 @@ int GetPS2ElfName(char *name){ FILE *fp; int i; - // inifile_read(CdromId); fp = fopen("System.map", "r"); if( fp == NULL ) return 2; @@ -396,7 +310,6 @@ void SaveGSState(const string& file) g_fGSSave->Freeze( g_nLeftGSFrames ); } -extern uptr pDsp; void LoadGSState(const string& file) { int ret; @@ -448,11 +361,6 @@ void LoadGSState(const string& file) #endif -#define NUM_STATES 10 -int StatesC = 0; - -extern char strgametitle[256]; - char* mystrlwr( char* string ) { assert( string != NULL ); @@ -529,10 +437,10 @@ void CycleFrameLimit(int dir) void ProcessFKeys(int fkey, int shift) { - assert(fkey >= 1 && fkey <= 12 ); + assert(fkey >= 1 && fkey <= 12 ); - switch(fkey) { - case 1: + switch(fkey) { + case 1: try { gzSavingState( SaveState::GetFilename( StatesC ) ).FreezeAll(); @@ -653,93 +561,26 @@ void ProcessFKeys(int fkey, int shift) #endif case 12: - if( shift ) { + if( shift ) { #ifdef PCSX2_DEVBUILD - iDumpRegisters(cpuRegs.pc, 0); + iDumpRegisters(cpuRegs.pc, 0); Console::Notice("hardware registers dumped EE:%x, IOP:%x\n", params cpuRegs.pc, psxRegs.pc); #endif - } - else { - g_Pcsx2Recording ^= 1; - if( mtgsThread != NULL ) { + } + else { + g_Pcsx2Recording ^= 1; + if( mtgsThread != NULL ) { mtgsThread->SendSimplePacket(GS_RINGTYPE_RECORD, g_Pcsx2Recording, 0, 0); - } - else { - if( GSsetupRecording != NULL ) GSsetupRecording(g_Pcsx2Recording, NULL); - } + } + else { + if( GSsetupRecording != NULL ) GSsetupRecording(g_Pcsx2Recording, NULL); + } if( SPU2setupRecording != NULL ) SPU2setupRecording(g_Pcsx2Recording, NULL); - } + } break; - } -} - -void injectIRX(const char *filename) -{ - char name[260], *p, *q; - struct romdir *rd; - int iROMDIR=-1, iIOPBTCONF=-1, iBLANK=-1, i, filesize; - FILE *fp; - - strcpy(name, filename); - for (i=0; name[i] && name[i]!='.' && i<10; i++) name[i]=toupper(name[i]);name[i]=0; - - //phase 1: find ROMDIR in bios - for (p=(char*)PS2MEM_ROM; p<(char*)PS2MEM_ROM+0x80000; p++) - if (strncmp(p, "RESET", 5)==0) - break; - rd=(struct romdir*)p; - - for (i=0; rd[i].fileName[0]; i++)if (strncmp(rd[i].fileName, name, strlen(name))==0)break; - if (rd[i].fileName[0])return;//already in;) - - //phase 2: make room in IOPBTCONF & ROMDIR - for (i=0; rd[i].fileName[0]; i++)if (strncmp(rd[i].fileName, "ROMDIR", 6)==0)iROMDIR=i; - for (i=0; rd[i].fileName[0]; i++)if (strncmp(rd[i].fileName, "IOPBTCONF", 9)==0)iIOPBTCONF=i; - - for (i=0; rd[i].fileName[0]; i++)if (rd[i].fileName[0]=='-')break; iBLANK=i; - rd[iBLANK].fileSize-=DIRENTRY_SIZE+DIRENTRY_SIZE; - p=(char*)PS2MEM_ROM;for (i=0; iq){*((u64*)p)=*((u64*)p-4);*((u64*)p+1)=*((u64*)p-3);p-=DIRENTRY_SIZE;} - *((u64*)p)=*((u64*)p+1)=0;p-=DIRENTRY_SIZE;rd[iIOPBTCONF].fileSize+=DIRENTRY_SIZE; - - q=(char*)PS2MEM_ROM;for (i=0; i<=iROMDIR; i++) q+=(rd[i].fileSize+0xF)&(~0xF); - while (p >q){*((u64*)p)=*((u64*)p-2);*((u64*)p+1)=*((u64*)p-1);p-=DIRENTRY_SIZE;} - *((u64*)p)=*((u64*)p+1)=0;p-=DIRENTRY_SIZE;rd[iROMDIR].fileSize+=DIRENTRY_SIZE; - - //phase 3: add the name to the end of IOPBTCONF - p=(char*)PS2MEM_ROM;for (i=0; i= 0x1200 && psxRegs.pc <= 0x1400) || (psxRegs.pc >= 0x0b40 && psxRegs.pc <= 0x1000)) + PSXCPU_LOG("%s\n", disR3000AF(psxRegs.code, psxRegs.pc)); psxRegs.pc+= 4; psxRegs.cycle++; @@ -395,6 +395,7 @@ static void doBranch(s32 tar) { branch2 = iopIsDelaySlot = true; branchPC = tar; execI(); + PSXCPU_LOG( "\n" ); iopIsDelaySlot = false; psxRegs.pc = branchPC; diff --git a/pcsx2/R3000AOpcodeTables.cpp b/pcsx2/R3000AOpcodeTables.cpp index ccd5c3c00c..98ba9a92de 100644 --- a/pcsx2/R3000AOpcodeTables.cpp +++ b/pcsx2/R3000AOpcodeTables.cpp @@ -18,8 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" -#include "Common.h" +#include "IopCommon.h" extern void zeroEx(); @@ -58,7 +57,7 @@ void psxANDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs void psxORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im void psxXORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im void psxSLTI() { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed) -void psxSLTIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < _ImmU_; } // Rt = Rs < Im (Unsigned) +void psxSLTIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < (u32)_Imm_; } // Rt = Rs < Im (Unsigned) /********************************************************* * Register arithmetic * diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index fffa793c05..78ad012144 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -387,19 +387,12 @@ static __forceinline void _cpuTestTIMR() static __forceinline void _cpuTestPERF() { - // fixme - The interpreter and recompiler both re-calculate these values - // whenever they are read, so updating them at regular intervals *should be* - // merely a common courtesy. But when I set them up to be called less - // frequently some games would crash. I'd like to figure out why someday. [Air] + // Perfs are updated when read by games (COP0's MFC0/MTC0 instructions), so we need + // only update them at semi-regular intervals to keep cpuRegs.cycle from wrapping + // around twice on us btween updates. Hence this function is called from the cpu's + // Counters update. - if((cpuRegs.PERF.n.pccr & 0x800003E0) == 0x80000020) { - cpuRegs.PERF.n.pcr0 += cpuRegs.cycle-s_iLastPERFCycle[0]; - s_iLastPERFCycle[0] = cpuRegs.cycle; - } - if((cpuRegs.PERF.n.pccr & 0x800F8000) == 0x80008000) { - cpuRegs.PERF.n.pcr1 += cpuRegs.cycle-s_iLastPERFCycle[1]; - s_iLastPERFCycle[1] = cpuRegs.cycle; - } + COP0_UpdatePCCR(); } // Checks the COP0.Status for exception enablings. diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index 0117b1d8af..900142b2ea 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -52,8 +52,35 @@ union GPRregs { }; union PERFregs { - struct { - u32 pccr, pcr0, pcr1, pad; + struct + { + union + { + struct + { + u32 pad0:1; // LSB should always be zero (or undefined) + u32 EXL0:1; // enable PCR0 during Level 1 exception handling + u32 K0:1; // enable PCR0 during Kernel Mode execution + u32 S0:1; // enable PCR0 during Supervisor mode execution + u32 U0:1; // enable PCR0 during User-mode execution + u32 Event0:5; // PCR0 event counter (all values except 1 ignored at this time) + + u32 pad1:1; // more zero/undefined padding [bit 10] + + u32 EXL1:1; // enable PCR1 during Level 1 exception handling + u32 K1:1; // enable PCR1 during Kernel Mode execution + u32 S1:1; // enable PCR1 during Supervisor mode execution + u32 U1:1; // enable PCR1 during User-mode execution + u32 Event1:5; // PCR1 event counter (all values except 1 ignored at this time) + + u32 Reserved:11; + u32 CTE:1; // Counter enable bit, no counting if set to zero. + } b; + + u32 val; + } pccr; + + u32 pcr0, pcr1, pad; } n; u32 r[4]; }; diff --git a/pcsx2/RDebug/deci2_dbgp.cpp b/pcsx2/RDebug/deci2_dbgp.cpp index 7a38786ee4..09ec2f44df 100644 --- a/pcsx2/RDebug/deci2_dbgp.cpp +++ b/pcsx2/RDebug/deci2_dbgp.cpp @@ -18,8 +18,7 @@ #include "PrecompiledHeader.h" -#include "Common.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "VUmicro.h" #include "deci2.h" diff --git a/pcsx2/RDebug/deci2_iloadp.cpp b/pcsx2/RDebug/deci2_iloadp.cpp index dcf733dc2a..86660bf28f 100644 --- a/pcsx2/RDebug/deci2_iloadp.cpp +++ b/pcsx2/RDebug/deci2_iloadp.cpp @@ -18,8 +18,7 @@ #include "PrecompiledHeader.h" -#include "Common.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "IopBios2.h" #include "deci2.h" diff --git a/pcsx2/SaveState.cpp b/pcsx2/SaveState.cpp index 256af3a99e..ccca61f110 100644 --- a/pcsx2/SaveState.cpp +++ b/pcsx2/SaveState.cpp @@ -18,8 +18,7 @@ #include "PrecompiledHeader.h" -#include "Common.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "SaveState.h" #include "CDVDisodrv.h" diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index 0a4bc57927..d7fe5fb242 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -23,7 +23,7 @@ #ifdef __LINUX__ #include "PS2Edefs.h" #endif - +#include "System.h" // Savestate Versioning! // If you make changes to the savestate version, please increment the value below. diff --git a/pcsx2/Saveslots.cpp b/pcsx2/Saveslots.cpp index 8681eb2f84..e39dbf9720 100644 --- a/pcsx2/Saveslots.cpp +++ b/pcsx2/Saveslots.cpp @@ -22,8 +22,7 @@ #include "GS.h" -TESTRUNARGS g_TestRun; - +StartupParams g_Startup; ////////////////////////////////////////////////////////////////////////////////////////// // Save Slot Detection System @@ -63,11 +62,11 @@ void States_Load( const string& file ) try { _loadStateOrExcept( file ); - HostGui::Notice( fmt_string( "*PCSX2*: Loaded State %s", file) ); + HostGui::Notice( fmt_string( "*PCSX2*: Loaded State %hs", &file) ); } catch( Exception::StateLoadError_Recoverable& ex) { - Console::Notice( "Could not load savestate file: %s.\n\n%s", params file, ex.cMessage() ); + Console::Notice( "Could not load savestate file: %hs.\n\n%s", params &file, ex.cMessage() ); // At this point the cpu hasn't been reset, so we can return // control to the user safely... (that's why we use a console notice instead of a popup) @@ -79,7 +78,7 @@ void States_Load( const string& file ) // The emulation state is ruined. Might as well give them a popup and start the gui. string message( fmt_string( - "Encountered an error while loading savestate from file: %s.\n", file ) ); + "Encountered an error while loading savestate from file: %hs.\n", &file ) ); if( g_EmulationInProgress ) message += "Since the savestate load was incomplete, the emulator must reset.\n"; @@ -145,12 +144,12 @@ void States_Save( const string& file ) try { StateRecovery::SaveToFile( file ); - HostGui::Notice( fmt_string( "State saved to file: %s", file ) ); + HostGui::Notice( fmt_string( "State saved to file: %hs", &file ) ); } catch( Exception::BaseException& ex ) { Console::Error( (fmt_string( - "An error occurred while trying to save to file %s\n", file ) + + "An error occurred while trying to save to file %hs\n", &file ) + "Your emulation state has not been saved!\n\nError: " + ex.Message()).c_str() ); } @@ -180,7 +179,7 @@ void States_Save(int num) // void vSyncDebugStuff( uint frame ) { -#ifdef PCSX2_DEVBUILD +#ifdef OLD_TESTBUILD_STUFF if( g_TestRun.enabled && g_TestRun.frame > 0 ) { if( frame > g_TestRun.frame ) { // take a snapshot diff --git a/pcsx2/Sif.cpp b/pcsx2/Sif.cpp index 5996e030a9..06b11b45b3 100644 --- a/pcsx2/Sif.cpp +++ b/pcsx2/Sif.cpp @@ -20,8 +20,7 @@ #define _PC_ // disables MIPS opcode macros. -#include "PsxCommon.h" -#include "Common.h" +#include "IopCommon.h" #include "Sifcmd.h" using namespace std; diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index ed50adf161..894ad7e180 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "MemoryCard.h" _sio sio; diff --git a/pcsx2/SourceLog.cpp b/pcsx2/SourceLog.cpp index 08351bb72d..84579c9a0e 100644 --- a/pcsx2/SourceLog.cpp +++ b/pcsx2/SourceLog.cpp @@ -37,7 +37,8 @@ using namespace R5900; FILE *emuLog; #ifdef PCSX2_DEVBUILD -u32 varLog; + +LogSources varLog; // these used by the depreciated _old_Log only u16 logProtocol; @@ -67,7 +68,7 @@ void __Log( const char* fmt, ... ) // fixme: should throw an exception here once we have proper exception handling implemented. } - if (varLog & 0x80000000) // log to console enabled? + if (varLog.LogToConsole) // log to console enabled? { Console::Write(tmp); @@ -105,7 +106,7 @@ static __forceinline void _vSourceLog( u16 protocol, u8 source, u32 cpuPc, u32 c #endif #endif - if (varLog & 0x80000000) // log to console enabled? + if (varLog.LogToConsole) // log to console enabled? { Console::WriteLn(tmp); @@ -145,7 +146,6 @@ IMPLEMENT_SOURCE_LOG( BIOS, 'E', 0 ) IMPLEMENT_SOURCE_LOG( CPU, 'E', 1 ) IMPLEMENT_SOURCE_LOG( FPU, 'E', 1 ) -IMPLEMENT_SOURCE_LOG( MMI, 'E', 1 ) IMPLEMENT_SOURCE_LOG( COP0, 'E', 1 ) IMPLEMENT_SOURCE_LOG( MEM, 'E', 6 ) diff --git a/pcsx2/Stats.cpp b/pcsx2/Stats.cpp index 94f7c9d07c..b515c2f7fb 100644 --- a/pcsx2/Stats.cpp +++ b/pcsx2/Stats.cpp @@ -20,8 +20,7 @@ #include -#include "Common.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "Stats.h" #include "Paths.h" diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index c4911fdf0c..b124da39ce 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -391,7 +391,7 @@ void SysPrepareExecution( const char* elf_file, bool use_bios ) return; } - if (OpenPlugins(g_TestRun.ptitle) == -1) + if (OpenPlugins(NULL) == -1) return; if( elf_file == NULL ) @@ -401,9 +401,6 @@ void SysPrepareExecution( const char* elf_file, bool use_bios ) // Not recovering a state, so need to execute the bios and load the ELF information. // (note: gsRecoveries are done from ExecuteCpu) - // if the elf_file is null we use the CDVD elf file. - // But if the elf_file is an empty string then we boot the bios instead. - char ename[g_MaxPath]; ename[0] = 0; if( !use_bios ) diff --git a/pcsx2/System.h b/pcsx2/System.h index 59e4690c8e..5d64ad0040 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -20,10 +20,12 @@ #define __SYSTEM_H__ #include "PS2Etypes.h" +#include "Pcsx2Config.h" #include "Exceptions.h" #include "Paths.h" #include "MemcpyFast.h" #include "SafeArray.h" +#include "Misc.h" enum PageProtectionMode diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 06079f6a98..4bf5ff8b2a 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -1534,11 +1534,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data){ } else { - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); GSGIFTRANSFER2((u32*)splittransfer[0], 1); - FreezeMMXRegs(0); - FreezeXMMRegs(0); + FreezeRegs(0); } if(vif1.tag.size == 0) vif1.cmd = 0; @@ -1576,12 +1574,9 @@ static int __fastcall Vif1TransDirectHL(u32 *data){ mtgsThread->SendDataPacket(); } else { - - FreezeXMMRegs(1); - FreezeMMXRegs(1); + FreezeRegs(1); GSGIFTRANSFER2(data, (ret >> 2)); - FreezeMMXRegs(0); - FreezeXMMRegs(0); + FreezeRegs(0); } return ret; @@ -2128,9 +2123,7 @@ void dmaVIF1() vif1ch->madr += vif1ch->qwc * 16; // mgs3 scene changes vif1ch->qwc = 0; CPU_INT(1, g_vifCycles); - } - vif1.done = 1; return; } diff --git a/pcsx2/configure.ac b/pcsx2/configure.ac index 24f1f50881..4f3d12e5e1 100644 --- a/pcsx2/configure.ac +++ b/pcsx2/configure.ac @@ -35,12 +35,13 @@ else DEBUG_FLAGS=" -O0 -g " fi -WARNING_FLAGS="-Wall -Wno-format -Wno-unused-value" -NORMAL_FLAGS=" -pipe -msse -msse2 -O2 " +WARNING_FLAGS="-Wno-format -Wno-unused-value" +EXTRA_WARNING_FLAGS="-Wall -Wextra" +NORMAL_FLAGS=" -pipe -msse -msse2 -O2 ${WARNING_FLAGS}" # These optimizations seem to cause issues with GCC 4.3.3, so we'll turn them off. NORMAL_FLAGS+=" -fno-guess-branch-probability -fno-dse -fno-tree-dse " -DEBUG_FLAGS+=" -g -msse -msse2 ${WARNING_FLAGS} " +DEBUG_FLAGS+=" -g -msse -msse2 ${EXTRA_WARNING_FLAGS} ${WARNING_FLAGS} " dnl Check for debug build AC_MSG_CHECKING(debug build) @@ -63,14 +64,14 @@ AC_MSG_RESULT($debug) AC_CHECK_FUNCS([ _aligned_malloc _aligned_free ], AC_DEFINE(HAVE_ALIGNED_MALLOC)) -AC_MSG_CHECKING(turn on memcpy_fast_) -AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]), - memcpyfast=$enableval,memcpyfast=no) -if test "x$memcpyfast" == xyes -then - AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY]) -fi -AC_MSG_RESULT($memcpyfast) +#AC_MSG_CHECKING(turn on memcpy_fast_) +#AC_ARG_ENABLE(memcpyfast, AC_HELP_STRING([--enable-memcpyfast], [Turns on memcpy_fast - EXPERIMENTAL]), +#memcpyfast=$enableval,memcpyfast=no) +#if test "x$memcpyfast" == xyes +#then +# AC_DEFINE(LINUX_USE_FAST_MEMORY,1,[LINUX_USE_FAST_MEMORY]) +#fi +#AC_MSG_RESULT($memcpyfast) #AC_MSG_CHECKING(turn on microVU) #AC_ARG_ENABLE(microVU, AC_HELP_STRING([--enable-microVU], [Turns on the currently incomplete microVU files - Not a good idea]), diff --git a/pcsx2/windows/Debugger.cpp b/pcsx2/windows/Debugger.cpp index c1e329e8f9..ccb47a1171 100644 --- a/pcsx2/windows/Debugger.cpp +++ b/pcsx2/windows/Debugger.cpp @@ -552,10 +552,7 @@ BOOL APIENTRY DebuggerProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam #ifdef PCSX2_DEVBUILD case IDC_DEBUG_LOG: - if( varLog ) - varLog &= ~0x80000000; - else - varLog |= 0x80000000; + varLog.LogToConsole = !varLog.LogToConsole; return TRUE; #endif diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 8e16330876..e2f88e6db5 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2925,6 +2925,10 @@ RelativePath="..\..\Common.h" > + + diff --git a/pcsx2/windows/Win32.h b/pcsx2/windows/Win32.h index 737fe1b149..252475101e 100644 --- a/pcsx2/windows/Win32.h +++ b/pcsx2/windows/Win32.h @@ -27,7 +27,7 @@ #include #include -#include "Misc.h" +#include "System.h" #include "HostGui.h" #include "resource.h" #include "WinDebugResource.h" @@ -149,7 +149,6 @@ extern AppData gApp; extern HWND hStatusWnd; extern PcsxConfig winConfig; // local storage of the configuration options. -extern bool UseGui; extern bool nDisableSC; // screensaver extern unsigned int langsMax; diff --git a/pcsx2/windows/WinMain.cpp b/pcsx2/windows/WinMain.cpp index 072ee20a8a..9e250994e6 100644 --- a/pcsx2/windows/WinMain.cpp +++ b/pcsx2/windows/WinMain.cpp @@ -55,36 +55,26 @@ void strcatz(char *dst, char *src) strcpy(dst + len, src); } + //2002-09-20 (Florin) BOOL APIENTRY CmdlineProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam);//forward def //------------------- static const char* phelpmsg = - "pcsx2 [options] [file]\n\n" - "-cfg [file] {configuration file}\n" - "-efile [efile] {0 - reset, 1 - runcd (default), 2 - loadelf}\n" - "-help {display this help file}\n" - "-nogui {Don't use gui when launching}\n" - "-loadgs [file] {Loads a gsstate}\n" + "pcsx2 [options] [cdimage/elf file]\n\n" + "\t-cfg [file] {configuration file}\n" + "\t-bootmode [mode] {0 - quick (default), 1 - bios, 2 - load elf}\n" + "\t-nogui {disables display of the gui - skips right to opening the GS window}" + "\t-help {display this help file}\n" + "\t-loadgs [file] {Loads a gsstate}\n\n" + "Run without GUI Options:\n" "\n" -#ifdef PCSX2_DEVBUILD - "Testing Options: \n" - "\t-frame [frame] {game will run up to this frame before exiting}\n" - "\t-image [name] {path and base name of image (do not include the .ext)}\n" - "\t-jpg {save images to jpg format}\n" - "\t-log [name] {log path to save log file in}\n" - "\t-logopt [hex] {log options in hex (see debug.h) }\n" - "\t-numimages [num] {after hitting frame, this many images will be captures every 20 frames}\n" - "\t-test {Triggers testing mode (only for dev builds)}\n" - "\n" -#endif - "Plugin Overrides (specified dlls will be used in place of configured dlls):\n" "\t-cdvd [dllpath] {specifies an override for the CDVD plugin}\n" "\t-gs [dllpath] {specifies an override for the GS plugin}\n" "\t-spu [dllpath] {specifies an override for the SPU2 plugin}\n" - "\t-pads [dllpath] {specifies an override for *both* pad plugins}\n" + "\t-pad [dllpath] {specifies an override for *both* pad plugins}\n" "\t-pad1 [dllpath] {specifies an override for the PAD1 plugin only}\n" "\t-pad2 [dllpath] {specifies an override for the PAD2 plugin only}\n" "\t-dev9 [dllpath] {specifies an override for the DEV9 plugin}\n" @@ -189,21 +179,6 @@ void WinClose() BOOL SysLoggedSetLockPagesPrivilege ( HANDLE hProcess, BOOL bEnable); -// Returns TRUE if the test run mode was activated (game was run and has been exited) -static bool TestRunMode() -{ - if( IsDevBuild && (g_TestRun.enabled || g_TestRun.ptitle != NULL) ) - { - // run without ui - UseGui = false; - PCSX2_MEM_PROTECT_BEGIN(); - SysPrepareExecution( g_TestRun.efile ? g_TestRun.ptitle : NULL ); - PCSX2_MEM_PROTECT_END(); - return true; - } - return false; -} - static void _doPluginOverride( const char* name, const char* src, char (&dest)[g_MaxPath] ) { if( src == NULL || src[0] == 0 ) return; @@ -219,12 +194,12 @@ void WinRun() memcpy( &winConfig, &Config, sizeof( PcsxConfig ) ); - _doPluginOverride( "GS", g_TestRun.pgsdll, Config.GS ); - _doPluginOverride( "CDVD", g_TestRun.pcdvddll, Config.CDVD ); - _doPluginOverride( "SPU2", g_TestRun.pspudll, Config.SPU2 ); - _doPluginOverride( "PAD1", g_TestRun.ppad1dll, Config.PAD1 ); - _doPluginOverride( "PAD2", g_TestRun.ppad2dll, Config.PAD2 ); - _doPluginOverride( "DEV9", g_TestRun.pdev9dll, Config.DEV9 ); + _doPluginOverride( "GS", g_Startup.gsdll, Config.GS ); + _doPluginOverride( "CDVD", g_Startup.cdvddll, Config.CDVD ); + _doPluginOverride( "SPU2", g_Startup.spudll, Config.SPU2 ); + _doPluginOverride( "PAD1", g_Startup.pad1dll, Config.PAD1 ); + _doPluginOverride( "PAD2", g_Startup.pad2dll, Config.PAD2 ); + _doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 ); #ifndef _DEBUG @@ -239,8 +214,6 @@ void WinRun() if (Pcsx2Configure(NULL) == FALSE) return; } - if( TestRunMode() ) return; - #ifdef PCSX2_DEVBUILD if( g_pRunGSState ) { LoadGSState(g_pRunGSState); @@ -294,8 +267,6 @@ void WinRun() textdomain(PACKAGE); #endif - memzero_obj(g_TestRun); - _getcwd( g_WorkingFolder, g_MaxPath ); int argc; @@ -303,7 +274,7 @@ void WinRun() if( argv == NULL ) { - Msgbox::Alert( "A fatal error occured while attempting to parse the command line.\n" ); + Msgbox::Alert( "A fatal error occurred while attempting to parse the command line.\n" ); return 2; } @@ -396,6 +367,23 @@ void RunGui() LoadPatch( str_Default ); + if( g_Startup.NoGui || g_Startup.Enabled ) + { + // Initially bypass GUI and start PCSX2 directly. + // Manually load plugins using the user's configured image (if non-elf). + + if( g_Startup.Enabled && (g_Startup.BootMode != BootMode_Elf) ) + { + if (OpenPlugins(g_Startup.ImageName) == -1) + return; + } + + SysPrepareExecution( + (g_Startup.BootMode == BootMode_Elf) ? g_Startup.ImageName : NULL, + (g_Startup.BootMode == BootMode_Bios) + ); + } + do { CreateMainWindow(); @@ -501,22 +489,26 @@ BOOL APIENTRY CmdlineProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) BOOL APIENTRY LogProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { int i; + + // Note: varLog layout has been changed, so this code won't be applicable to the new wx version + // of this dialog box. + switch (message) { case WM_INITDIALOG: - for (i=0; i<32; i++) - if (varLog & (1<=MMX_FPU&&(x)= MMX_GPR && (x) < MMX_GPR+34) struct _mmxregs { diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index 641d601ebe..12b887fbe0 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -25,17 +25,19 @@ #include "iFPU.h" /* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */ + +/* Can be made faster by not converting stuff back and forth between instructions. */ //set overflow flag (set only if FPU_RESULT is 1) #define FPU_FLAGS_OVERFLOW 1 //set underflow flag (set only if FPU_RESULT is 1) #define FPU_FLAGS_UNDERFLOW 1 -//if 1, result is not clamped (MORE correct, +//if 1, result is not clamped (Gives correct results as in PS2, //but can cause problems due to insuffecient clamping levels in the VUs) #define FPU_RESULT 1 -//also impacts other aspects of DIV/R/SQRT correctness +//set I&D flags. also impacts other aspects of DIV/R/SQRT correctness #define FPU_FLAGS_ID 1 //------------------------------------------------------------------ @@ -126,270 +128,8 @@ static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xf //------------------------------------------------------------------ // *FPU Opcodes!* -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// CFC1 / CTC1 -//------------------------------------------------------------------ -void recCFC1(void) -{ - if ( !_Rt_ || ( (_Fs_ != 0) && (_Fs_ != 31) ) ) return; - - _eeOnWriteReg(_Rt_, 1); - - MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] ); - _deleteEEreg(_Rt_, 0); - - if (_Fs_ == 31) - { - AND32ItoR(EAX, 0x0083c078); //remove always-zero bits - OR32ItoR(EAX, 0x01000001); //set always-one bits - } - - if(EEINST_ISLIVE1(_Rt_)) - { - CDQ( ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); - } - else - { - EEINST_RESETHASLIVE1(_Rt_); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - } - -} - -void recCTC1( void ) -{ - if ( _Fs_ != 31 ) return; - - if ( GPR_IS_CONST1(_Rt_) ) - { - MOV32ItoM((uptr)&fpuRegs.fprc[ _Fs_ ], g_cpuConstRegs[_Rt_].UL[0]); - } - else - { - int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - - if( mmreg >= 0 ) - { - SSEX_MOVD_XMM_to_M32((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); - } - - else - { - mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ); - - if ( mmreg >= 0 ) - { - MOVDMMXtoM((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); - SetMMXstate(); - } - else - { - _deleteGPRtoXMMreg(_Rt_, 1); - _deleteMMXreg(MMX_GPR+_Rt_, 1); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - MOV32RtoM( (uptr)&fpuRegs.fprc[ _Fs_ ], EAX ); - } - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MFC1 -//------------------------------------------------------------------ - -void recMFC1(void) -{ - int regt, regs; - if ( ! _Rt_ ) return; - - _eeOnWriteReg(_Rt_, 1); - - regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - - if( regs >= 0 ) - { - _deleteGPRtoXMMreg(_Rt_, 2); - regt = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_WRITE); - - if( regt >= 0 ) - { - SSE2_MOVDQ2Q_XMM_to_MM(regt, regs); - - if(EEINST_ISLIVE1(_Rt_)) - _signExtendGPRtoMMX(regt, _Rt_, 0); - else - EEINST_RESETHASLIVE1(_Rt_); - } - else - { - if(EEINST_ISLIVE1(_Rt_)) - { - _signExtendXMMtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs, 0); - } - else - { - EEINST_RESETHASLIVE1(_Rt_); - SSE_MOVSS_XMM_to_M32((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs); - } - } - } - else - { - regs = _checkMMXreg(MMX_FPU+_Fs_, MODE_READ); - - if( regs >= 0 ) - { - // convert to mmx reg - mmxregs[regs].reg = MMX_GPR+_Rt_; - mmxregs[regs].mode |= MODE_READ|MODE_WRITE; - _signExtendGPRtoMMX(regs, _Rt_, 0); - } - else - { - regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - - if( regt >= 0 ) - { - if( xmmregs[regt].mode & MODE_WRITE ) - { - SSE_MOVHPS_XMM_to_M64((uptr)&cpuRegs.GPR.r[_Rt_].UL[2], regt); - } - xmmregs[regt].inuse = 0; - } - - _deleteEEreg(_Rt_, 0); - MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); - - if(EEINST_ISLIVE1(_Rt_)) - { - CDQ( ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); - } - else - { - EEINST_RESETHASLIVE1(_Rt_); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - } - } - } -} - -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MTC1 -//------------------------------------------------------------------ -void recMTC1(void) -{ - if( GPR_IS_CONST1(_Rt_) ) - { - _deleteFPtoXMMreg(_Fs_, 0); - MOV32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, g_cpuConstRegs[_Rt_].UL[0]); - } - else - { - int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - - if( mmreg >= 0 ) - { - if( g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE ) - { - // transfer the reg directly - _deleteGPRtoXMMreg(_Rt_, 2); - _deleteFPtoXMMreg(_Fs_, 2); - _allocFPtoXMMreg(mmreg, _Fs_, MODE_WRITE); - } - else - { - int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - - if( mmreg2 >= 0 ) - SSE_MOVSS_XMM_to_XMM(mmreg2, mmreg); - else - SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); - } - } - else - { - int mmreg2; - - mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ); - mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - - if( mmreg >= 0 ) - { - if( mmreg2 >= 0 ) - { - SetMMXstate(); - SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg); - } - else - { - SetMMXstate(); - MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); - } - } - else - { - if( mmreg2 >= 0 ) - { - SSE_MOVSS_M32_to_XMM(mmreg2, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); - } - else - { - MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); - MOV32RtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, EAX); - } - } - } - } -} -//------------------------------------------------------------------ - - -/*#ifndef FPU_RECOMPILE // If FPU_RECOMPILE is not defined, then use the interpreter opcodes. (CFC1, CTC1, MFC1, and MTC1 are special because they work specifically with the EE rec so they're defined above) - -REC_FPUFUNC(ABS_S); -REC_FPUFUNC(ADD_S); -REC_FPUFUNC(ADDA_S); -REC_FPUBRANCH(BC1F); -REC_FPUBRANCH(BC1T); -REC_FPUBRANCH(BC1FL); -REC_FPUBRANCH(BC1TL); -REC_FPUFUNC(C_EQ); -REC_FPUFUNC(C_F); -REC_FPUFUNC(C_LE); -REC_FPUFUNC(C_LT); -REC_FPUFUNC(CVT_S); -REC_FPUFUNC(CVT_W); -REC_FPUFUNC(DIV_S); -REC_FPUFUNC(MAX_S); -REC_FPUFUNC(MIN_S); -REC_FPUFUNC(MADD_S); -REC_FPUFUNC(MADDA_S); -REC_FPUFUNC(MOV_S); -REC_FPUFUNC(MSUB_S); -REC_FPUFUNC(MSUBA_S); -REC_FPUFUNC(MUL_S); -REC_FPUFUNC(MULA_S); -REC_FPUFUNC(NEG_S); -REC_FPUFUNC(SUB_S); -REC_FPUFUNC(SUBA_S); -REC_FPUFUNC(SQRT_S); -REC_FPUFUNC(RSQRT_S); - -#else // FPU_RECOMPILE*/ - +//------------------------------------------------------------------ + //------------------------------------------------------------------ // PS2 -> DOUBLE //------------------------------------------------------------------ @@ -678,7 +418,7 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc) // CommutativeOp XMM (used for ADD, MUL, MAX, MIN and SUB opcodes) //------------------------------------------------------------------ static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = { - SSE2_ADDSD_XMM_to_XMM, NULL, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM }; + SSE2_ADDSD_XMM_to_XMM, NULL, NULL, NULL, SSE2_SUBSD_XMM_to_XMM }; void recFPUOp(int info, int regd, int op, bool acc) { @@ -718,48 +458,6 @@ void recADDA_S_xmm(int info) FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); //------------------------------------------------------------------ -//------------------------------------------------------------------ -// BC1x XMM -//------------------------------------------------------------------ - /* -static void _setupBranchTest() -{ - _eeFlushAllUnused(); - - // COP1 branch conditionals are based on the following equation: - // (fpuRegs.fprc[31] & 0x00800000) - // BC2F checks if the statement is false, BC2T checks if the statement is true. - - MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); - TEST32ItoR(EAX, FPUflagC); -} - -void recBC1F( void ) -{ - _setupBranchTest(); - recDoBranchImm(JNZ32(0)); -} - -void recBC1T( void ) -{ - _setupBranchTest(); - recDoBranchImm(JZ32(0)); -} - -void recBC1FL( void ) -{ - _setupBranchTest(); - recDoBranchImm_Likely(JNZ32(0)); -} - -void recBC1TL( void ) -{ - _setupBranchTest(); - recDoBranchImm_Likely(JZ32(0)); -}*/ -//------------------------------------------------------------------ - -//TOKNOW : how does C.??.S behave with denormals? void recCMP(int info) { int sreg, treg; @@ -787,12 +485,7 @@ void recC_EQ_xmm(int info) } FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); - -/*void recC_F() -{ - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); -}*/ - + void recC_LE_xmm(int info ) { recCMP(info); @@ -806,7 +499,6 @@ void recC_LE_xmm(int info ) } FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); -//REC_FPUFUNC(C_LE); void recC_LT_xmm(int info) { @@ -821,7 +513,6 @@ void recC_LT_xmm(int info) } FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); -//REC_FPUFUNC(C_LT); //------------------------------------------------------------------ @@ -840,7 +531,7 @@ void recCVT_S_xmm(int info) FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); -void recCVT_W() +void recCVT_W() //called from iFPU.cpp's recCVT_W { int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); @@ -1052,17 +743,42 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X // MAX / MIN XMM //------------------------------------------------------------------ -//TOKNOW : handles denormals like VU, maybe? +static const u32 PCSX2_ALIGNED16(minmax_mask[4]) = {0xffffffff, 0x80000000, 0, 0}; +static const u32 PCSX2_ALIGNED16(minmax_mask2[4]) = {0, 0x40000000, 0, 0}; +// FPU's MAX/MIN work with all numbers (including "denormals"). Check VU's logical min max for more info. +void recMINMAX(int info, bool ismin) +{ + int sreg, treg; + ALLOC_S(sreg); ALLOC_T(treg); + + CLEAR_OU_FLAGS; + + SSE2_PSHUFD_XMM_to_XMM(sreg, sreg, 0x00); + SSE2_PAND_M128_to_XMM(sreg, (uptr)minmax_mask); + SSE2_POR_M128_to_XMM(sreg, (uptr)minmax_mask2); + SSE2_PSHUFD_XMM_to_XMM(treg, treg, 0x00); + SSE2_PAND_M128_to_XMM(treg, (uptr)minmax_mask); + SSE2_POR_M128_to_XMM(treg, (uptr)minmax_mask2); + if (ismin) + SSE2_MINSD_XMM_to_XMM(sreg, treg); + else + SSE2_MAXSD_XMM_to_XMM(sreg, treg); + + SSE_MOVSS_XMM_to_XMM(EEREC_D, sreg); + + _freeXMMreg(sreg); _freeXMMreg(treg); +} + void recMAX_S_xmm(int info) { - recFPUOp(info, EEREC_D, 2, false); + recMINMAX(info, false); } FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); void recMIN_S_xmm(int info) { - recFPUOp(info, EEREC_D, 3, false); + recMINMAX(info, true); } FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); @@ -1322,6 +1038,5 @@ void recRSQRT_S_xmm(int info) FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); -//#endif // FPU_RECOMPILE -} } } } } \ No newline at end of file +} } } } } diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 077a119d40..2ffd8f789e 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -1023,99 +1023,46 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT, -1); - int t3reg = _allocTempXMMreg(XMMT_INT, -1); - if ( cpucaps.hasStreamingSIMD4Extensions ) { - SSE4_PMOVSXDQ_XMM_to_XMM(t0reg, EEREC_S); - SSE4_PMOVSXDQ_XMM_to_XMM(t1reg, EEREC_T); - SSE2_PADDQ_XMM_to_XMM(t0reg, t1reg); - SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0x0e); - SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0x0e); - SSE4_PMOVSXDQ_XMM_to_XMM(t1reg, t1reg); - SSE4_PMOVSXDQ_XMM_to_XMM(t2reg, t2reg); - } - else { - SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); - SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); - SSE2_PXOR_XMM_to_XMM(t2reg, t2reg); - SSE2_PXOR_XMM_to_XMM(t3reg, t3reg); - SSE2_PCMPGTD_XMM_to_XMM(t2reg, t0reg); - SSE2_PCMPGTD_XMM_to_XMM(t3reg, t1reg); - SSE2_PUNPCKLDQ_XMM_to_XMM(t0reg, t2reg); - SSE2_PUNPCKLDQ_XMM_to_XMM(t1reg, t3reg); - SSE2_PADDQ_XMM_to_XMM(t0reg, t1reg); - SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_S); - SSE2_PUNPCKHDQ_XMM_to_XMM(t1reg, t2reg); - SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_T); - SSE2_PUNPCKHDQ_XMM_to_XMM(t2reg, t3reg); - } - SSE2_PADDQ_XMM_to_XMM(t1reg, t2reg); - /* - t0reg = { Rs[0]+Rt[0], Rs[1]+Rt[1] } - t1reg = { Rs[2]+Rt[2], Rs[3]+Rt[3] } - */ + // The idea is: + // s = x + y; (wrap-arounded) + // if Sign(x) == Sign(y) && Sign(s) != Sign(x) && Sign(x) == 0 then positive overflow (clamp with 0x7fffffff) + // if Sign(x) == Sign(y) && Sign(s) != Sign(x) && Sign(x) == 1 then negative overflow (clamp with 0x80000000) - SSEX_MOVDQA_XMM_to_XMM(t2reg, t0reg); - SSE_SHUFPS_XMM_to_XMM(t2reg, t1reg, 0xdd); - SSE2_PSRAD_I8_to_XMM(t2reg, 31); - /* - t2reg = { (Rs[0]+Rt[0]) < 0 ? 0xFFFFFFFF : 0, - (Rs[1]+Rt[1]) < 0 ? 0xFFFFFFFF : 0, - (Rs[2]+Rt[2]) < 0 ? 0xFFFFFFFF : 0, - (Rs[3]+Rt[3]) < 0 ? 0xFFFFFFFF : 0 } - */ + // get sign bit + SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); + SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); + SSE2_PSRLD_I8_to_XMM(t0reg, 31); + SSE2_PSRLD_I8_to_XMM(t1reg, 31); - SSE2_PSHUFD_XMM_to_XMM(t3reg, t2reg, 0x50); - SSE2_PXOR_XMM_to_XMM(t0reg, t3reg); - SSE2_PSRLQ_I8_to_XMM(t3reg, 63); - SSE2_PADDQ_XMM_to_XMM(t0reg, t3reg); - /* - t0reg = { abs(Rs[0]+Rt[0]), abs(Rs[1]+Rt[1]) } - */ - SSE2_PSHUFD_XMM_to_XMM(t3reg, t2reg, 0xfa); - SSE2_PXOR_XMM_to_XMM(t1reg, t3reg); - SSE2_PSRLQ_I8_to_XMM(t3reg, 63); - SSE2_PADDQ_XMM_to_XMM(t1reg, t3reg); - /* - t1reg = { abs(Rs[2]+Rt[2]), abs(Rs[3]+Rt[3]) } - */ - SSE2_PSLLQ_I8_to_XMM(t0reg, 1); - SSE2_PSLLQ_I8_to_XMM(t1reg, 1); - SSE2_PCMPEQB_XMM_to_XMM(t3reg, t3reg); - SSE2_PSRLD_I8_to_XMM(t3reg, 1); - SSE2_PXOR_XMM_to_XMM(t2reg, t3reg); - SSE_SHUFPS_XMM_to_XMM(t0reg, t1reg, 0xdd); - SSE2_PXOR_XMM_to_XMM(t1reg, t1reg); - SSE2_PCMPEQD_XMM_to_XMM(t1reg, t0reg); - /* - t1reg = { abs(Rs[0]+Rt[0]) > 0x7FFFFFFF ? 0 : 0xFFFFFFFF, - abs(Rs[1]+Rt[1]) > 0x7FFFFFFF ? 0 : 0xFFFFFFFF, - abs(Rs[2]+Rt[2]) > 0x7FFFFFFF ? 0 : 0xFFFFFFFF, - abs(Rs[3]+Rt[3]) > 0x7FFFFFFF ? 0 : 0xFFFFFFFF } - t2reg = { (Rs[0]+Rt[0]) < 0 ? 0x80000000 : 0x7FFFFFFF, - (Rs[1]+Rt[1]) < 0 ? 0x80000000 : 0x7FFFFFFF, - (Rs[2]+Rt[2]) < 0 ? 0x80000000 : 0x7FFFFFFF, - (Rs[3]+Rt[3]) < 0 ? 0x80000000 : 0x7FFFFFFF } - */ + // normal addition if( EEREC_D == EEREC_S ) SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T); } - SSE2_PAND_XMM_to_XMM(EEREC_D, t1reg); - SSE2_PANDN_XMM_to_XMM(t1reg, t2reg); - SSE2_POR_XMM_to_XMM(EEREC_D, t1reg); - /* - Rd = { t1reg[0] ? Rs[0]+Rt[0] : t2reg[0], - t1reg[1] ? Rs[1]+Rt[1] : t2reg[1], - t1reg[2] ? Rs[2]+Rt[2] : t2reg[2], - t1reg[3] ? Rs[3]+Rt[3] : t2reg[3] } - */ + + // overflow check + // t2reg = 0xffffffff if overflow, else 0 + SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_D); + SSE2_PSRLD_I8_to_XMM(t2reg, 31); + SSE2_PCMPEQD_XMM_to_XMM(t1reg, t0reg); // Sign(Rs) == Sign(Rt) + SSE2_PCMPEQD_XMM_to_XMM(t2reg, t0reg); // Sign(Rs) == Sign(Rd) + SSE2_PANDN_XMM_to_XMM(t2reg, t1reg); // (Sign(Rs) == Sign(Rt)) & ~(Sign(Rs) == Sign(Rd)) + SSE2_PCMPEQD_XMM_to_XMM(t1reg, t1reg); + SSE2_PSRLD_I8_to_XMM(t1reg, 1); // 0x7fffffff + SSE2_PADDD_XMM_to_XMM(t1reg, t0reg); // t1reg = (Rs < 0) ? 0x80000000 : 0x7fffffff + + // saturation + SSE2_PAND_XMM_to_XMM(t1reg, t2reg); + SSE2_PANDN_XMM_to_XMM(t2reg, EEREC_D); + SSE2_POR_XMM_to_XMM(t1reg, t2reg); + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, t1reg); + _freeXMMreg(t0reg); _freeXMMreg(t1reg); _freeXMMreg(t2reg); - _freeXMMreg(t3reg); CPU_SSE_XMMCACHE_END if( _Rd_ ) _deleteEEreg(_Rd_, 0); @@ -1210,71 +1157,49 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT, -1); - int t3reg = _allocTempXMMreg(XMMT_INT, -1); - if ( cpucaps.hasStreamingSIMD4Extensions ) { - SSE4_PMOVSXDQ_XMM_to_XMM(t0reg, EEREC_S); - SSE4_PMOVSXDQ_XMM_to_XMM(t1reg, EEREC_T); - SSE2_PSUBQ_XMM_to_XMM(t0reg, t1reg); - SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0x0e); - SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0x0e); - SSE4_PMOVSXDQ_XMM_to_XMM(t1reg, t1reg); - SSE4_PMOVSXDQ_XMM_to_XMM(t2reg, t2reg); - } - else { - SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); - SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); - SSE2_PXOR_XMM_to_XMM(t2reg, t2reg); - SSE2_PXOR_XMM_to_XMM(t3reg, t3reg); - SSE2_PCMPGTD_XMM_to_XMM(t2reg, t0reg); - SSE2_PCMPGTD_XMM_to_XMM(t3reg, t1reg); - SSE2_PUNPCKLDQ_XMM_to_XMM(t0reg, t2reg); - SSE2_PUNPCKLDQ_XMM_to_XMM(t1reg, t3reg); - SSE2_PSUBQ_XMM_to_XMM(t0reg, t1reg); - SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_S); - SSE2_PUNPCKHDQ_XMM_to_XMM(t1reg, t2reg); - SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_T); - SSE2_PUNPCKHDQ_XMM_to_XMM(t2reg, t3reg); - } - SSE2_PSUBQ_XMM_to_XMM(t1reg, t2reg); + // The idea is: + // s = x - y; (wrap-arounded) + // if Sign(x) != Sign(y) && Sign(s) != Sign(x) && Sign(x) == 0 then positive overflow (clamp with 0x7fffffff) + // if Sign(x) != Sign(y) && Sign(s) != Sign(x) && Sign(x) == 1 then negative overflow (clamp with 0x80000000) - SSEX_MOVDQA_XMM_to_XMM(t2reg, t0reg); - SSE_SHUFPS_XMM_to_XMM(t2reg, t1reg, 0xdd); - SSE2_PSRAD_I8_to_XMM(t2reg, 31); + // get sign bit + SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); + SSEX_MOVDQA_XMM_to_XMM(t1reg, EEREC_T); + SSE2_PSRLD_I8_to_XMM(t0reg, 31); + SSE2_PSRLD_I8_to_XMM(t1reg, 31); - SSE2_PSHUFD_XMM_to_XMM(t3reg, t2reg, 0x50); - SSE2_PXOR_XMM_to_XMM(t0reg, t3reg); - SSE2_PSRLQ_I8_to_XMM(t3reg, 63); - SSE2_PADDQ_XMM_to_XMM(t0reg, t3reg); - SSE2_PSHUFD_XMM_to_XMM(t3reg, t2reg, 0xfa); - SSE2_PXOR_XMM_to_XMM(t1reg, t3reg); - SSE2_PSRLQ_I8_to_XMM(t3reg, 63); - SSE2_PADDQ_XMM_to_XMM(t1reg, t3reg); - SSE2_PSLLQ_I8_to_XMM(t0reg, 1); - SSE2_PSLLQ_I8_to_XMM(t1reg, 1); - SSE2_PCMPEQB_XMM_to_XMM(t3reg, t3reg); - SSE2_PSRLD_I8_to_XMM(t3reg, 1); - SSE2_PXOR_XMM_to_XMM(t2reg, t3reg); - SSE_SHUFPS_XMM_to_XMM(t0reg, t1reg, 0xdd); - SSE2_PXOR_XMM_to_XMM(t1reg, t1reg); - SSE2_PCMPEQD_XMM_to_XMM(t1reg, t0reg); + // normal subtraction if( EEREC_D == EEREC_S ) SSE2_PSUBD_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { - SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_T); + SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_T); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); - SSE2_PSUBD_XMM_to_XMM(EEREC_D, t0reg); + SSE2_PSUBD_XMM_to_XMM(EEREC_D, t2reg); } else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBD_XMM_to_XMM(EEREC_D, EEREC_T); } - SSE2_PAND_XMM_to_XMM(EEREC_D, t1reg); - SSE2_PANDN_XMM_to_XMM(t1reg, t2reg); - SSE2_POR_XMM_to_XMM(EEREC_D, t1reg); + + // overflow check + // t2reg = 0xffffffff if NOT overflow, else 0 + SSEX_MOVDQA_XMM_to_XMM(t2reg, EEREC_D); + SSE2_PSRLD_I8_to_XMM(t2reg, 31); + SSE2_PCMPEQD_XMM_to_XMM(t1reg, t0reg); // Sign(Rs) == Sign(Rt) + SSE2_PCMPEQD_XMM_to_XMM(t2reg, t0reg); // Sign(Rs) == Sign(Rd) + SSE2_POR_XMM_to_XMM(t2reg, t1reg); // (Sign(Rs) == Sign(Rt)) | (Sign(Rs) == Sign(Rd)) + SSE2_PCMPEQD_XMM_to_XMM(t1reg, t1reg); + SSE2_PSRLD_I8_to_XMM(t1reg, 1); // 0x7fffffff + SSE2_PADDD_XMM_to_XMM(t1reg, t0reg); // t1reg = (Rs < 0) ? 0x80000000 : 0x7fffffff + + // saturation + SSE2_PAND_XMM_to_XMM(EEREC_D, t2reg); + SSE2_PANDN_XMM_to_XMM(t2reg, t1reg); + SSE2_POR_XMM_to_XMM(EEREC_D, t2reg); + _freeXMMreg(t0reg); _freeXMMreg(t1reg); _freeXMMreg(t2reg); - _freeXMMreg(t3reg); CPU_SSE_XMMCACHE_END if( _Rd_ ) _deleteEEreg(_Rd_, 0); diff --git a/pcsx2/x86/iPsxMem.cpp b/pcsx2/x86/iPsxMem.cpp index 4fa4003107..55b6384a0f 100644 --- a/pcsx2/x86/iPsxMem.cpp +++ b/pcsx2/x86/iPsxMem.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "PsxCommon.h" +#include "IopCommon.h" #include "iR3000A.h" #include "VU.h" diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 6a0c6af8dd..4fa5df94cd 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -30,7 +30,7 @@ #include #endif -#include "PsxCommon.h" +#include "IopCommon.h" #include "VU.h" #include "iCore.h" diff --git a/pcsx2/x86/iR3000Atables.cpp b/pcsx2/x86/iR3000Atables.cpp index 6fd3885604..ef66b115dc 100644 --- a/pcsx2/x86/iR3000Atables.cpp +++ b/pcsx2/x86/iR3000Atables.cpp @@ -19,7 +19,7 @@ #include "PrecompiledHeader.h" #include -#include "PsxCommon.h" +#include "IopCommon.h" #include "iR3000A.h" #include "IopMem.h" #include "IopDma.h" @@ -97,7 +97,7 @@ PSXRECOMPILE_CONSTCODE1(SLTI); //// SLTIU void rpsxSLTIU_const() { - g_psxConstRegs[_Rt_] = g_psxConstRegs[_Rs_] < _ImmU_; + g_psxConstRegs[_Rt_] = g_psxConstRegs[_Rs_] < (u32)_Imm_; } void rpsxSLTUconst(int info, int dreg, int sreg, int imm) diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 1ea2e87c1b..7cf66ef529 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -698,19 +698,10 @@ void* SuperVUGetProgram(u32 startpc, int vuindex) bool VuFunctionHeader::IsSame(void* pmem) { #ifdef SUPERVU_CACHING - //u32 checksum[2]; vector::iterator it; - FORIT(it, ranges) { - //memxor_mmx(checksum, (u8*)pmem+it->start, it->size); - //if( checksum[0] != it->checksum[0] || checksum[1] != it->checksum[1] ) - // return false; - // memcmp_mmx doesn't work on x86-64 machines - // and neither does pcsx2. -//#if defined(_MSC_VER) - if( memcmp_mmx((u8*)pmem+it->start, it->pmem, it->size) ) -//#else -// if( memcmp((u8*)pmem+it->start, it->pmem, it->size) ) -//#endif + FORIT(it, ranges) + { + if( memcmp_mmx((u8*)pmem+it->start, it->pmem, it->size) ) return false; } #endif @@ -3022,7 +3013,6 @@ void VuInstruction::Recompile(list::iterator& itinst, u32 vuxyz) CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); u8* jptr = JNE8(0); MOV32MtoR(EAX, pparentinst->pClipWrite); - MOV32ItoM(pparentinst->pClipWrite, 0); MOV32RtoM(s_ClipRead, EAX); x86SetJ8(jptr); } diff --git a/pcsx2/x86/iVif.cpp b/pcsx2/x86/iVif.cpp index f6e4fcfeac..64d63a99f9 100644 --- a/pcsx2/x86/iVif.cpp +++ b/pcsx2/x86/iVif.cpp @@ -29,11 +29,6 @@ extern u32 g_vif1Masks[48], g_vif0Masks[48]; extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4]; -//static const u32 writearr[4] = { 0xffffffff, 0, 0, 0 }; -//static const u32 rowarr[4] = { 0, 0xffffffff, 0, 0 }; -//static const u32 colarr[4] = { 0, 0, 0xffffffff, 0 }; -//static const u32 updatearr[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0 }; - // arranged in writearr, rowarr, colarr, updatearr static PCSX2_ALIGNED16(u32 s_maskarr[16][4]) = { 0xffffffff, 0x00000000, 0x00000000, 0xffffffff, @@ -58,8 +53,6 @@ extern u8 s_maskwrite[256]; extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0}; -//#if defined(_MSC_VER) - void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) { u32 i; @@ -67,7 +60,7 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) FreezeXMMRegs(1); for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) { - prev |= s_maskwrite[mask&0xff];//((mask&3)==3)||((mask&0xc)==0xc)||((mask&0x30)==0x30)||((mask&0xc0)==0xc0); + prev |= s_maskwrite[mask&0xff]; hasmask[i] = prev; if( (mask&0xff) != (oldmask&0xff) ) { @@ -93,46 +86,3 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) } FreezeXMMRegs(0); } - - -/*#else // gcc -// Is this really supposed to be assembly for gcc and C for Windows? -void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask) -{ - u32 i; - u32 prev = 0; - FreezeXMMRegs(1); - - for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) { - - prev |= s_maskwrite[mask&0xff];//((mask&3)==3)||((mask&0xc)==0xc)||((mask&0x30)==0x30)||((mask&0xc0)==0xc0); - hasmask[i] = prev; - - if( (mask&0xff) != (oldmask&0xff) ) { - u8* p0 = (u8*)&s_maskarr[mask&15][0]; - u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0]; - - __asm__(".intel_syntax noprefix\n" - "movaps xmm0, [%0]\n" - "movaps xmm1, [%1]\n" - "movaps xmm2, xmm0\n" - "punpcklwd xmm0, xmm0\n" - "punpckhwd xmm2, xmm2\n" - "movaps xmm3, xmm1\n" - "punpcklwd xmm1, xmm1\n" - "punpckhwd xmm3, xmm3\n" - "movq [%2], xmm0\n" - "movq [%2+8], xmm1\n" - "movhps [%2+16], xmm0\n" - "movhps [%2+24], xmm1\n" - "movq [%2+32], xmm2\n" - "movq [%2+40], xmm3\n" - "movhps [%2+48], xmm2\n" - "movhps [%2+56], xmm3\n" - ".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) ); - } - } - FreezeXMMRegs(0); -} - -#endif*/ diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index d5c054c039..2a8411d4cf 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -444,7 +444,7 @@ int _getFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (mmxregs[i].reg >= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { // mmxregs[i] is unsigned, and MMX_GPR == 0, so the first part is always true. if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR] & (EEINST_LIVE0|EEINST_LIVE1)) ) { _freeMMXreg(i); return i; diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 1843666bcf..029a69b677 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -613,9 +613,12 @@ void recStep( void ) { static __forceinline bool recEventTest() { #ifdef PCSX2_DEVBUILD - // dont' remove this check unless doing an official release - if( g_globalXMMSaved || g_globalMMXSaved) + // dont' remove this check unless doing an official release + if( g_globalXMMSaved || g_globalMMXSaved) + { DevCon::Error("Pcsx2 Foopah! Frozen regs have not been restored!!!"); + DevCon::Error("g_globalXMMSaved = %d,g_globalMMXSaved = %d",params g_globalXMMSaved, g_globalMMXSaved); + } assert( !g_globalXMMSaved && !g_globalMMXSaved); #endif @@ -1198,11 +1201,12 @@ u32 eeScaleBlockCycles() jNO_DEFAULT } - s_nBlockCycles *= + const u32 temp = s_nBlockCycles * ( (s_nBlockCycles <= (10<<3)) ? scalarLow : - ((s_nBlockCycles > (21<<3)) ? scalarHigh : scalarMid ); + ((s_nBlockCycles > (21<<3)) ? scalarHigh : scalarMid ) + ); - return s_nBlockCycles >> (3+2); + return temp >> (3+2); } // Generates dynarec code for Event tests followed by a block dispatch (branch). diff --git a/pcsx2/x86/ix86-32/iR5900AritImm.cpp b/pcsx2/x86/ix86-32/iR5900AritImm.cpp index 89500896c4..4d805768e3 100644 --- a/pcsx2/x86/ix86-32/iR5900AritImm.cpp +++ b/pcsx2/x86/ix86-32/iR5900AritImm.cpp @@ -486,19 +486,6 @@ void recADDIU( void ) //////////////////////////////////////////////////// void recDADDI( void ) { -#ifdef __x86_64_ - if ( ! _Rt_ ) - { - return; - } - - MOV64MtoR( RAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD64ItoR( EAX, _Imm_ ); - } - MOV64RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX ); -#else if ( ! _Rt_ ) { return; @@ -520,7 +507,6 @@ void recDADDI( void ) } MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); -#endif } //////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_macros.h b/pcsx2/x86/ix86/ix86_macros.h index 8265141040..ea37192104 100644 --- a/pcsx2/x86/ix86/ix86_macros.h +++ b/pcsx2/x86/ix86/ix86_macros.h @@ -950,6 +950,9 @@ #define SSSE3_PABSW_XMM_to_XMM eSSSE3_PABSW_XMM_to_XMM<_EmitterId_> #define SSSE3_PABSD_XMM_to_XMM eSSSE3_PABSD_XMM_to_XMM<_EmitterId_> #define SSSE3_PALIGNR_XMM_to_XMM eSSSE3_PALIGNR_XMM_to_XMM<_EmitterId_> +#define SSSE3_PSIGNB_XMM_to_XMM eSSSE3_PSIGNB_XMM_to_XMM<_EmitterId_> +#define SSSE3_PSIGNW_XMM_to_XMM eSSSE3_PSIGNW_XMM_to_XMM<_EmitterId_> +#define SSSE3_PSIGND_XMM_to_XMM eSSSE3_PSIGND_XMM_to_XMM<_EmitterId_> //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -963,6 +966,7 @@ #define SSE4_BLENDVPS_XMM_to_XMM eSSE4_BLENDVPS_XMM_to_XMM<_EmitterId_> #define SSE4_BLENDVPS_M128_to_XMM eSSE4_BLENDVPS_M128_to_XMM<_EmitterId_> #define SSE4_PMOVSXDQ_XMM_to_XMM eSSE4_PMOVSXDQ_XMM_to_XMM<_EmitterId_> +#define SSE4_PMOVZXDQ_XMM_to_XMM eSSE4_PMOVZXDQ_XMM_to_XMM<_EmitterId_> #define SSE4_PINSRD_R32_to_XMM eSSE4_PINSRD_R32_to_XMM<_EmitterId_> #define SSE4_PMAXSD_XMM_to_XMM eSSE4_PMAXSD_XMM_to_XMM<_EmitterId_> #define SSE4_PMINSD_XMM_to_XMM eSSE4_PMINSD_XMM_to_XMM<_EmitterId_> diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl index 87c2c6d764..971a33af17 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.inl @@ -1224,6 +1224,30 @@ emitterT void eSSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 write8(imm8); } +emitterT void eSSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x08380F); + ModRM(3, to, from); +} + +emitterT void eSSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x09380F); + ModRM(3, to, from); +} + +emitterT void eSSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x0A380F); + ModRM(3, to, from); +} + // SSE4.1 emitterT void eSSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) @@ -1295,6 +1319,14 @@ emitterT void eSSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) ModRM(3, to, from); } +emitterT void eSSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x35380F); + ModRM(3, to, from); +} + emitterT void eSSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) { write8(0x66); diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index f2c5bde936..1ea1f9eaa9 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -72,15 +72,23 @@ void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) extern "C" { #endif + +__forceinline void FreezeRegs(int save) +{ + FreezeXMMRegs(save); + FreezeMMXRegs(save); +} + __forceinline void FreezeMMXRegs_(int save) { + //DevCon::Notice("FreezeMMXRegs_(%d); [%d]\n", save, g_globalMMXSaved); assert( g_EEFreezeRegs ); if( save ) { g_globalMMXSaved++; if( g_globalMMXSaved>1 ) { - //SysPrintf("MMX Already Saved!\n"); + //DevCon::Notice("MMX Already Saved!\n"); return; } @@ -117,7 +125,7 @@ __forceinline void FreezeMMXRegs_(int save) else { if( g_globalMMXSaved==0 ) { - //SysPrintf("MMX Not Saved!\n"); + //DevCon::Notice("MMX Not Saved!\n"); return; } g_globalMMXSaved--; @@ -159,14 +167,14 @@ __forceinline void FreezeMMXRegs_(int save) // XMM Register Freezing __forceinline void FreezeXMMRegs_(int save) { - //SysPrintf("FreezeXMMRegs_(%d); [%d]\n", save, g_globalXMMSaved); + //DevCon::Notice("FreezeXMMRegs_(%d); [%d]\n", save, g_globalXMMSaved); assert( g_EEFreezeRegs ); if( save ) { g_globalXMMSaved++; if( g_globalXMMSaved > 1 ){ - //SysPrintf("XMM Already saved\n"); + //DevCon::Notice("XMM Already saved\n"); return; } @@ -204,7 +212,7 @@ __forceinline void FreezeXMMRegs_(int save) { if( g_globalXMMSaved==0 ) { - //SysPrintf("XMM Regs not saved!\n"); + //DevCon::Notice("XMM Regs not saved!\n"); return; } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index eaeeadd90e..2ce53e7a8c 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -43,6 +43,16 @@ PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xb PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652}; PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7}; PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb}; +PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4}; +PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e}; +PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f}; +PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14}; +PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8}; +PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4}; +PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff}; +PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553}; +PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510}; +PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac}; PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0}; PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0}; PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0}; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index d31d659694..dcae7f4377 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -100,11 +100,13 @@ struct microProgManager { }; struct microVU { - int index; // VU Index (VU0 or VU1) + u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size u32 progSize; // VU Micro Program Size (microSize/8) u32 cacheAddr; // VU Cache Start Address static const u32 cacheSize = 0x400000; // VU Cache Size + + microProgManager<0x800> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) @@ -122,7 +124,6 @@ struct microVU { uptr x86esi; // Source register. Used as a pointer to a source in stream operations. uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations. */ - microProgManager<0x800> prog; // Micro Program Data }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index af260661be..ccea38603b 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -36,14 +36,14 @@ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, _X_Y_Z_W); \ } -#define getZeroSS(reg) { \ +#define getZero(reg) { \ if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _X_Y_Z_W); } \ else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ } -#define getZero(reg) { \ - if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _X_Y_Z_W); } \ - else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ +#define getReg6(reg, _reg_) { \ + if (!_reg_) { getZero(reg); } \ + else { getReg(reg, _reg_); } \ } microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { @@ -51,26 +51,9 @@ microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { Fs = xmmFs; Ft = xmmFt; Fd = xmmFs; - if (_XYZW_SS) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - - if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZeroSS(Ft); } - else { getReg(Ft, _Ft_); } - } - } - else { - if (!_Fs_) { getZero(Fs); } - else { getReg(Fs, _Fs_); } - - if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZero(Ft); } - else { getReg(Ft, _Ft_); } - } - } + getReg6(Fs, _Fs_); + if (_Ft_ == _Fs_) { Ft = Fs; } + else { getReg6(Ft, _Ft_); } } microVUt(void) mVUallocFMAC1b(int& Fd) { @@ -88,14 +71,7 @@ microVUt(void) mVUallocFMAC2a(int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; Ft = xmmFs; - if (_XYZW_SS) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero(Fs); } - else { getReg(Fs, _Fs_); } - } + getReg6(Fs, _Fs_); } microVUt(void) mVUallocFMAC2b(int& Ft) { @@ -139,21 +115,15 @@ microVUt(void) mVUallocFMAC3a(int& Fd, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmFs; if (_XYZW_SS) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - + getReg6(Fs, _Fs_); if ( (_Ft_ == _Fs_) && ((_X && _bc_x) || (_Y && _bc_y) || (_Z && _bc_w) || (_W && _bc_w)) ) { Ft = Fs; } - else { - if (!_Ft_) { getZero3SS(Ft); } - else { getReg3SS(Ft, _Ft_); } - } + else if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } } else { - if (!_Fs_) { getZero(Fs); } - else { getReg(Fs, _Fs_); } - + getReg6(Fs, _Fs_); if (!_Ft_) { getZero3(Ft); } else { getReg3(Ft, _Ft_); } } @@ -188,24 +158,17 @@ microVUt(void) mVUallocFMAC4a(int& ACC, int& Fs, int& Ft) { Ft = xmmFt; getACC(ACC); if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - + getReg6(Fs, _Fs_); if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZeroSS(Ft); } - else { getReg(Ft, _Ft_); } - } + else { getReg6(Ft, _Ft_); } } else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZero4(Ft); } - else { getReg4(Ft, _Ft_); } - } + else if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } } } @@ -225,16 +188,10 @@ microVUt(void) mVUallocFMAC5a(int& ACC, int& Fs, int& Ft) { Ft = xmmFt; getACC(ACC); if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - - if ( (_Ft_ == _Fs_) && _bc_x) { - Ft = Fs; - } - else { - if (!_Ft_) { getZero3SS(Ft); } - else { getReg3SS(Ft, _Ft_); } - } + getReg6(Fs, _Fs_); + if ((_Ft_ == _Fs_) && _bc_x) { Ft = Fs; } + else if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } } else { if (!_Fs_) { getZero4(Fs); } @@ -266,14 +223,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmFs; getIreg(Ft); - if (_XYZW_SS) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero(Fs); } - else { getReg(Fs, _Fs_); } - } + getReg6(Fs, _Fs_); } microVUt(void) mVUallocFMAC6b(int& Fd) { @@ -290,14 +240,9 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) { Ft = xmmFt; getACC(ACC); getIreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC7b(int& ACC, int& Fs) { @@ -315,24 +260,17 @@ microVUt(void) mVUallocFMAC8a(int& Fd, int&ACC, int& Fs, int& Ft) { Fd = xmmFs; ACC = xmmACC0 + readACC; if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - + getReg6(Fs, _Fs_); if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZeroSS(Ft); } - else { getReg(Ft, _Ft_); } - } + else { getReg6(Ft, _Ft_); } } else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZero4(Ft); } - else { getReg4(Ft, _Ft_); } - } + else if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } } } @@ -355,24 +293,17 @@ microVUt(void) mVUallocFMAC9a(int& Fd, int&ACC, int& Fs, int& Ft) { ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - + getReg6(Fs, _Fs_); if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZeroSS(Ft); } - else { getReg(Ft, _Ft_); } - } + else { getReg6(Ft, _Ft_); } } else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZero4(Ft); } - else { getReg4(Ft, _Ft_); } - } + else if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } } } @@ -394,16 +325,10 @@ microVUt(void) mVUallocFMAC10a(int& Fd, int& ACC, int& Fs, int& Ft) { Fd = xmmFs; ACC = xmmACC0 + readACC; if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - - if ( (_Ft_ == _Fs_) && _bc_x) { - Ft = Fs; - } - else { - if (!_Ft_) { getZero3SS(Ft); } - else { getReg3SS(Ft, _Ft_); } - } + getReg6(Fs, _Fs_); + if ( (_Ft_ == _Fs_) && _bc_x) { Ft = Fs; } + else if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } } else { if (!_Fs_) { getZero4(Fs); } @@ -430,16 +355,10 @@ microVUt(void) mVUallocFMAC11a(int& Fd, int& ACC, int& Fs, int& Ft) { ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - - if ( (_Ft_ == _Fs_) && _bc_x) { - Ft = Fs; - } - else { - if (!_Ft_) { getZero3SS(Ft); } - else { getReg3SS(Ft, _Ft_); } - } + getReg6(Fs, _Fs_); + if ( (_Ft_ == _Fs_) && _bc_x) { Ft = Fs; } + else if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } } else { if (!_Fs_) { getZero4(Fs); } @@ -465,14 +384,9 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int&ACC, int& Fs, int& Ft) { Fd = xmmFs; ACC = xmmACC0 + readACC; getIreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC12b(int& Fd) { @@ -491,14 +405,9 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int&ACC, int& Fs, int& Ft) { ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); getIreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC13b(int& Fd) { @@ -516,24 +425,17 @@ microVUt(void) mVUallocFMAC14a(int& ACCw, int&ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCr = xmmACC0 + readACC; if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - + getReg6(Fs, _Fs_); if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZeroSS(Ft); } - else { getReg(Ft, _Ft_); } - } + else { getReg6(Ft, _Ft_); } } else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } if (_Ft_ == _Fs_) { Ft = Fs; } - else { - if (!_Ft_) { getZero4(Ft); } - else { getReg4(Ft, _Ft_); } - } + else if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } } } @@ -570,16 +472,10 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int&ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCr = xmmACC0 + readACC; if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - - if ( (_Ft_ == _Fs_) && _bc_x) { - Ft = Fs; - } - else { - if (!_Ft_) { getZero3SS(Ft); } - else { getReg3SS(Ft, _Ft_); } - } + getReg6(Fs, _Fs_); + if ((_Ft_ == _Fs_) && _bc_x) { Ft = Fs; } + else if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } } else { if (!_Fs_) { getZero4(Fs); } @@ -624,8 +520,8 @@ microVUt(void) mVUallocFMAC18a(int& ACC, int& Fs, int& Ft) { if (!_Ft_) { getZero4(Ft); } else { getReg4(Ft, _Ft_); } - SSE_SHUFPS_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY - SSE_SHUFPS_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ + SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY + SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ } microVUt(void) mVUallocFMAC18b(int& ACC, int& Fs) { @@ -650,8 +546,8 @@ microVUt(void) mVUallocFMAC19a(int& Fd, int&ACC, int& Fs, int& Ft) { if (!_Ft_) { getZero4(Ft); } else { getReg4(Ft, _Ft_); } - SSE_SHUFPS_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY - SSE_SHUFPS_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ + SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY + SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ } microVUt(void) mVUallocFMAC19b(int& Fd) { @@ -669,14 +565,9 @@ microVUt(void) mVUallocFMAC20a(int& ACCw, int&ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCr = xmmACC0 + readACC; getIreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC20b(int& ACCw, int& Fs) { @@ -712,14 +603,7 @@ microVUt(void) mVUallocFMAC22a(int& Fd, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmFs; getQreg(Ft); - if (_XYZW_SS) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero(Fs); } - else { getReg(Fs, _Fs_); } - } + getReg6(Fs, _Fs_); } microVUt(void) mVUallocFMAC22b(int& Fd) { @@ -736,21 +620,15 @@ microVUt(void) mVUallocFMAC23a(int& ACC, int& Fs, int& Ft) { Ft = xmmFt; getACC(ACC); getQreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC23b(int& ACC, int& Fs) { mVUallocFMAC4b(ACC, Fs); } - //------------------------------------------------------------------ // FMAC24 - MADD FMAC Opcode Storing Result to Fd (Q Reg) //------------------------------------------------------------------ @@ -762,14 +640,9 @@ microVUt(void) mVUallocFMAC24a(int& Fd, int&ACC, int& Fs, int& Ft) { Fd = xmmFs; ACC = xmmACC0 + readACC; getQreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC24b(int& Fd) { @@ -788,14 +661,9 @@ microVUt(void) mVUallocFMAC25a(int& Fd, int&ACC, int& Fs, int& Ft) { ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); getQreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC25b(int& Fd) { @@ -813,14 +681,9 @@ microVUt(void) mVUallocFMAC26a(int& ACCw, int&ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCr = xmmACC0 + readACC; getQreg(Ft); - if (_XYZW_SS && _X) { - if (!_Fs_) { getZeroSS(Fs); } - else { getReg(Fs, _Fs_); } - } - else { - if (!_Fs_) { getZero4(Fs); } - else { getReg4(Fs, _Fs_); } - } + if (_XYZW_SS && _X) { getReg6(Fs, _Fs_); } + else if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } } microVUt(void) mVUallocFMAC26b(int& ACCw, int& Fs) { @@ -885,14 +748,14 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) { microVUt(void) mVUallocVIa(int GPRreg, int _reg_) { microVU* mVU = mVUx; if (_reg_ == 0) { XOR32RtoR(GPRreg, GPRreg); } - else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmxVI1 + (_reg_ - 1)); } + else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmVI(_reg_)); } else { MOVZX32M16toR(GPRreg, (uptr)&mVU->regs->VI[_reg_].UL); } } microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { microVU* mVU = mVUx; if (_reg_ == 0) { return; } - else if (_reg_ < 9) { MOVD32RtoMMX(mmxVI1 + (_reg_ - 1), GPRreg); } + else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); } else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); } } @@ -906,12 +769,23 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { } //------------------------------------------------------------------ -// Div/Sqrt/Rsqrt Allocator Helpers +// Lower Instruction Allocator Helpers //------------------------------------------------------------------ #define getReg5(reg, _reg_, _fxf_) { \ - mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \ - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _fxf_))); \ + if (!_reg_) { \ + if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \ + else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \ + } \ + else { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _fxf_))); \ + } \ } +// Doesn't Clamp +#define getReg7(reg, _reg_) { \ + if (!_reg_) { getZero(reg); } \ + else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); } \ +} #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 43ae6e8d80..9a3579aae5 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -137,7 +137,7 @@ microVUf(void) mVU_RSQRT() { #define EATANhelper(addr) { \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ - SSE_MOVSS_XMM_to_XMM(xmmFt, xmmT1); \ + SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmT1); \ SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \ SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \ } @@ -147,7 +147,7 @@ microVUt(void) mVU_EATAN_() { // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_T1); - SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFs); + SSE_MOVAPS_XMM_to_XMM(xmmT1, xmmFs); EATANhelper(mVU_T2); EATANhelper(mVU_T3); @@ -158,16 +158,15 @@ microVUt(void) mVU_EATAN_() { EATANhelper(mVU_T8); SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_Pi4); - SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); } microVUf(void) mVU_EATAN() { microVU* mVU = mVUx; if (recPass == 0) {} else { getReg5(xmmFs, _Fs_, _Fsf_); - SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - // ToDo: Can Be Optimized Further? (takes approximately (~125 cycles + mem access time) on a c2d) SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); @@ -180,9 +179,9 @@ microVUf(void) mVU_EATANxy() { microVU* mVU = mVUx; if (recPass == 0) {} else { - getReg5(xmmFs, _Fs_, 1); - getReg5(xmmFt, _Fs_, 0); - SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + getReg6(xmmFt, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); @@ -196,9 +195,9 @@ microVUf(void) mVU_EATANxz() { microVU* mVU = mVUx; if (recPass == 0) {} else { - getReg5(xmmFs, _Fs_, 2); - getReg5(xmmFt, _Fs_, 0); - SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + getReg6(xmmFt, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt); @@ -208,16 +207,188 @@ microVUf(void) mVU_EATANxz() { mVU_EATAN_(); } } -microVUf(void) mVU_EEXP() {} -microVUf(void) mVU_ELENG() {} -microVUf(void) mVU_ERCPR() {} -microVUf(void) mVU_ERLENG() {} -microVUf(void) mVU_ERSADD() {} -microVUf(void) mVU_ERSQRT() {} -microVUf(void) mVU_ESADD() {} -microVUf(void) mVU_ESIN() {} -microVUf(void) mVU_ESQRT() {} -microVUf(void) mVU_ESUM() {} +#define eexpHelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ + SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \ +} +microVUf(void) mVU_EEXP() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1); + SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); + + SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MULSS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MOVAPS_XMM_to_XMM(xmmT1, xmmFt); + SSE_MULSS_M32_to_XMM(xmmFt, (uptr)mVU_E2); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); + + eexpHelper(mVU_E3); + eexpHelper(mVU_E4); + eexpHelper(mVU_E5); + + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); + SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_E6); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1); + SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUt(void) mVU_sumXYZ() { + // regd.x = x ^ 2 + y ^ 2 + z ^ 2 + if( cpucaps.hasStreamingSIMD4Extensions ) { + SSE4_DPPS_XMM_to_XMM(xmmFs, xmmFs, 0x71); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + } + else { + SSE_MULPS_XMM_to_XMM(xmmFs, xmmFs); // wzyx ^ 2 + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFs, 0xe1); // wzyx -> wzxy + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFs, 0xD2); // wzxy -> wxyz + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + z ^ 2 + } +} +microVUf(void) mVU_ELENG() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ERCPR() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ERLENG() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ERSADD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + //SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad? + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ERSQRT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ESADD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +#define esinHelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \ + SSE_MOVAPS_XMM_to_XMM(xmmFs, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \ +} +microVUf(void) mVU_ESIN() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + //SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant? + SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt); + SSE_MOVAPS_XMM_to_XMM(xmmT1, xmmFs); + SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt); + SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MULSS_M32_to_XMM(xmmFs, (uptr)mVU_S2); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); + + esinHelper(mVU_S3); + esinHelper(mVU_S4); + + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); + SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ESQRT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ESUM() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x1b); + SSE_ADDPS_XMM_to_XMM(xmmFs, xmmFt); + SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x01); + SSE_ADDSS_XMM_to_XMM(xmmFs, xmmFt); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} microVUf(void) mVU_FCAND() {} microVUf(void) mVU_FCEQ() {} @@ -302,8 +473,11 @@ microVUf(void) mVU_IADD() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - ADD16RtoR(gprT1, gprT2); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT2, _Ft_); + ADD16RtoR(gprT1, gprT2); + } + else ADD16RtoR(gprT1, gprT1); mVUallocVIb(gprT1, _Fd_); } } @@ -330,8 +504,10 @@ microVUf(void) mVU_IAND() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - AND32RtoR(gprT1, gprT2); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT2, _Ft_); + AND32RtoR(gprT1, gprT2); + } mVUallocVIb(gprT1, _Fd_); } } @@ -340,8 +516,10 @@ microVUf(void) mVU_IOR() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - OR32RtoR(gprT1, gprT2); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT2, _Ft_); + OR32RtoR(gprT1, gprT2); + } mVUallocVIb(gprT1, _Fd_); } } @@ -349,10 +527,16 @@ microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; if (recPass == 0) {} else { - mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - SUB16RtoR(gprT1, gprT2); - mVUallocVIb(gprT1, _Fd_); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + SUB16RtoR(gprT1, gprT2); + } + else if (!isMMX(_Fd_)) { + XOR32RtoR(gprT1, gprT1); + mVUallocVIb(gprT1, _Fd_); + } + else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); } } } microVUf(void) mVU_ISUBIU() { @@ -376,14 +560,9 @@ microVUf(void) mVU_IBNE() {} microVUf(void) mVU_JR() {} microVUf(void) mVU_JALR() {} -microVUf(void) mVU_ILW() {} -microVUf(void) mVU_ISW() {} -microVUf(void) mVU_ILWR() {} -microVUf(void) mVU_ISWR() {} - microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (recPass == 0) {} + if (recPass == 0) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -391,7 +570,7 @@ microVUf(void) mVU_MOVE() { } microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; - if (recPass == 0) {} + if (recPass == 0) { /*If (!_Ft_) nop();*/ } else { mVUallocVIa(gprT1, _Fs_); MOVSX32R16toR(gprT1, gprT1); @@ -402,7 +581,7 @@ microVUf(void) mVU_MFIR() { } microVUf(void) mVU_MFP() { microVU* mVU = mVUx; - if (recPass == 0) {} + if (recPass == 0) { /*If (!_Ft_) nop();*/ } else { getPreg(xmmFt); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -418,21 +597,208 @@ microVUf(void) mVU_MTIR() { } microVUf(void) mVU_MR32() { microVU* mVU = mVUx; - if (recPass == 0) {} + if (recPass == 0) { /*If (!_Ft_) nop();*/ } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); - if (_X_Y_Z_W != 8) { SSE_SHUFPS_XMM_to_XMM(xmmT1, xmmT1, 0x39); } + if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } } -microVUf(void) mVU_LQ() {} -microVUf(void) mVU_LQD() {} -microVUf(void) mVU_LQI() {} -microVUf(void) mVU_SQ() {} -microVUf(void) mVU_SQD() {} -microVUf(void) mVU_SQI() {} -//microVUf(void) mVU_LOI() {} +microVUf(void) mVU_ILW() { + microVU* mVU = mVUx; + if (recPass == 0) { /*If (!_Ft_) nop();*/ } + else { + if (!_Fs_) { + MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); + mVUallocVIb(gprT1, _Ft_); + } + else { + mVUallocVIa(gprT1, _Fs_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); + mVUallocVIb(gprT1, _Ft_); + } + } +} +microVUf(void) mVU_ILWR() { + microVU* mVU = mVUx; + if (recPass == 0) { /*If (!_Ft_) nop();*/ } + else { + if (!_Fs_) { + MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS ); + mVUallocVIb(gprT1, _Ft_); + } + else { + mVUallocVIa(gprT1, _Fs_); + mVUaddrFix(gprT1); + MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); + mVUallocVIb(gprT1, _Ft_); + } + } +} +microVUf(void) mVU_ISW() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Fs_) { + int imm = getVUmem(_Imm11_); + mVUallocVIa(gprT1, _Ft_); + if (_X) MOV32RtoM((uptr)mVU->regs->Mem + imm, gprT1); + if (_Y) MOV32RtoM((uptr)mVU->regs->Mem + imm + 4, gprT1); + if (_Z) MOV32RtoM((uptr)mVU->regs->Mem + imm + 8, gprT1); + if (_W) MOV32RtoM((uptr)mVU->regs->Mem + imm + 12, gprT1); + } + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + } + } +} +microVUf(void) mVU_ISWR() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Fs_) { + mVUallocVIa(gprT1, _Ft_); + if (_X) MOV32RtoM((uptr)mVU->regs->Mem, gprT1); + if (_Y) MOV32RtoM((uptr)mVU->regs->Mem+4, gprT1); + if (_Z) MOV32RtoM((uptr)mVU->regs->Mem+8, gprT1); + if (_W) MOV32RtoM((uptr)mVU->regs->Mem+12, gprT1); + } + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + mVUaddrFix(gprT1); + if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + } + } +} + +microVUf(void) mVU_LQ() { + microVU* mVU = mVUx; + if (recPass == 0) { /*If (!_Ft_) nop();*/ } + else { + if (!_Fs_) { + mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Fs_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + } + } +} +microVUf(void) mVU_LQD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Fs_ && _Ft_) { + mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Fs_); + SUB16ItoR(gprT1, 1); + mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. + if (_Ft_) { + mVUaddrFix(gprT1); + mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + } + } + } +} +microVUf(void) mVU_LQI() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Fs_ && _Ft_) { + mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + } + else { + mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); + if (_Ft_) { + MOV32RtoR(gprT2, gprT1); + mVUaddrFix(gprT1); + mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + } + ADD16ItoR(gprT2, 1); + mVUallocVIb(gprT2, _Fs_); // ToDo: Backup to memory check. + } + } +} +microVUf(void) mVU_SQ() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Ft_) { + getReg7(xmmFs, _Fs_); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Ft_); + ADD32ItoR(gprT1, _Imm11_); + mVUaddrFix(gprT1); + getReg7(xmmFs, _Fs_); + mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + } +} +microVUf(void) mVU_SQD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Ft_) { + getReg7(xmmFs, _Fs_); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Ft_); + SUB16ItoR(gprT1, 1); + mVUallocVIb(gprT1, _Ft_); // ToDo: Backup to memory check. + mVUaddrFix(gprT1); + getReg7(xmmFs, _Fs_); + mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + } +} +microVUf(void) mVU_SQI() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + if (!_Ft_) { + getReg7(xmmFs, _Fs_); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); + } + else { + mVUallocVIa(gprT1, _Ft_); + MOV32RtoR(gprT2, gprT1); + mVUaddrFix(gprT1); + getReg7(xmmFs, _Fs_); + mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); + ADD16ItoR(gprT2, 1); + mVUallocVIb(gprT2, _Ft_); // ToDo: Backup to memory check. + } + } +} microVUf(void) mVU_RINIT() {} microVUf(void) mVU_RGET() {} diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index b30db341e9..a7cfc6330d 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -21,16 +21,11 @@ //------------------------------------------------------------------ // Global Variables //------------------------------------------------------------------ + PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]); -PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_4[4]); -PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_12[4]); -PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_15[4]); -PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_4[4]); -PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_12[4]); -PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T1[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T2[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T3[4]); @@ -40,6 +35,22 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_4[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_12[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_15[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_4[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_12[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); //------------------------------------------------------------------ // Helper Macros @@ -73,6 +84,9 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); #define _Imm5_ (((mVU->code & 0x400) ? 0xfff0 : 0) | ((mVU->code >> 6) & 0xf)) #define _Imm15_ (((mVU->code >> 10) & 0x7800) | (mVU->code & 0x7ff)) +#define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) +#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12))) + #define xmmT1 0 // Temp Reg #define xmmFs 1 // Holds the Value of Fs (writes back result Fd) #define xmmFt 2 // Holds the Value of Ft @@ -129,4 +143,7 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); //#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13)) //#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14)) +#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) +#define mmVI(_VIreg_) (_VIreg_ - 1) + #include "microVU_Misc.inl" diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 3b1fe47866..e229a09dae 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -73,82 +73,113 @@ microVUx(void) mVUunpack_xyzw(int dstreg, int srcreg, int xyzw) { } } -microVUx(void) mVUloadReg(int reg, u32 offset, int xyzw) { +microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) { switch( xyzw ) { case 8: SSE_MOVSS_M32_to_XMM(reg, offset); break; // X case 4: SSE_MOVSS_M32_to_XMM(reg, offset+4); break; // Y case 2: SSE_MOVSS_M32_to_XMM(reg, offset+8); break; // Z case 1: SSE_MOVSS_M32_to_XMM(reg, offset+12); break; // W - //case 3: SSE_MOVHPS_M64_to_XMM(reg, offset+8); break; // ZW (not sure if this is faster than default) - //case 12: SSE_MOVLPS_M64_to_XMM(reg, offset); break; // XY (not sure if this is faster than default) default: SSE_MOVAPS_M128_to_XMM(reg, offset); break; } } -microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) { +microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { + switch( xyzw ) { + case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X + case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y + case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z + case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W + default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break; + } +} + +microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { switch ( xyzw ) { - case 1: // W - SSE_MOVSS_XMM_to_M32(offset+12, reg); - break; - case 2: // Z - SSE_MOVSS_XMM_to_M32(offset+8, reg); - break; - case 3: // ZW - SSE_MOVHPS_XMM_to_M64(offset+8, reg); - break; - case 4: // Y - SSE_MOVSS_XMM_to_M32(offset+4, reg); - break; - case 5: // YW - SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xB1); - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset+4, reg); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; - case 6: // YZ - SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1); - break; - case 7: // YZW - SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; - case 8: // X - SSE_MOVSS_XMM_to_M32(offset, reg); - break; - case 9: // XW - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset, reg); - if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); - else SSE_SHUFPS_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; - case 10: //XZ - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset, reg); - SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); - break; - case 11: //XZW - SSE_MOVSS_XMM_to_M32(offset, reg); - SSE_MOVHPS_XMM_to_M64(offset+8, reg); - break; - case 12: // XY - SSE_MOVLPS_XMM_to_M64(offset, reg); - break; - case 13: // XYW - SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_M64(offset, xmmT1); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); - break; - case 14: // XYZ - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_M64(offset, reg); - SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); - break; - case 15: // XYZW - SSE_MOVAPS_XMM_to_M128(offset, reg); - break; + case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); + SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_M32(offset+4, reg); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // YW + case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); + SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1); + break; // YZ + case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW + SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // YZW + case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_M32(offset, reg); + if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); + else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // XW + case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_M32(offset, reg); + SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); + break; //XZ + case 11: SSE_MOVSS_XMM_to_M32(offset, reg); + SSE_MOVHPS_XMM_to_M64(offset+8, reg); + break; //XZW + case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW + SSE_MOVHPS_XMM_to_M64(offset, xmmT1); + SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + break; // XYW + case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVLPS_XMM_to_M64(offset, reg); + SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); + break; // XYZ + case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X + case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y + case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z + case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W + case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY + case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW + default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW + } +} + +microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { + switch ( xyzw ) { + case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); + SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // YW + case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); + SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + break; // YZ + case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW + SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // YZW + case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); + else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // XW + case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + break; //XZ + case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); + break; //XZW + case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW + SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + break; // XYW + case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + break; // XYZ + case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X + case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y + case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z + case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W + case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY + case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW + default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW } } @@ -174,10 +205,10 @@ microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) { SSE2_MOVSD_XMM_to_XMM(dest, src); break; case 5: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8); + SSE2_PSHUFD_XMM_to_XMM(dest, dest, 0xd8); break; case 6: SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78); + SSE2_PSHUFD_XMM_to_XMM(dest, dest, 0x78); break; case 7: SSE_MOVSS_XMM_to_XMM(src, dest); SSE_MOVAPS_XMM_to_XMM(dest, src); @@ -185,10 +216,10 @@ microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) { case 8: SSE_MOVSS_XMM_to_XMM(dest, src); break; case 9: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2); + SSE2_PSHUFD_XMM_to_XMM(dest, dest, 0xd2); break; case 10: SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72); + SSE2_PSHUFD_XMM_to_XMM(dest, dest, 0x72); break; case 11: SSE_MOVSS_XMM_to_XMM(dest, src); SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); @@ -210,4 +241,23 @@ microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) { } } +// Transforms the Address in gprReg to valid VU0/VU1 Address +microVUt(void) mVUaddrFix(int gprReg) { + if ( vuIndex == 1 ) { + AND32ItoR(EAX, 0x3ff); // wrap around + SHL32ItoR(EAX, 4); + } + else { + u8 *jmpA, *jmpB; + CMP32ItoR(EAX, 0x400); + jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs + AND32ItoR(EAX, 0x43f); + jmpB = JMP8(0); + x86SetJ8(jmpA); + AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around + x86SetJ8(jmpB); + SHL32ItoR(EAX, 4); // multiply by 16 (shift left by 4) + } +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 9b5e3a5090..651ef92048 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -78,12 +78,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX // Helper Macros //------------------------------------------------------------------ +// FMAC1 - Normal FMAC Opcodes #define mVU_FMAC1(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC1a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -91,13 +91,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC1b(Fd); \ } \ } - +// FMAC3 - BC(xyzw) FMAC Opcodes #define mVU_FMAC3(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC3a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -105,13 +104,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC3b(Fd); \ } \ } - +// FMAC4 - FMAC Opcodes Storing Result to ACC #define mVU_FMAC4(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC4a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -119,13 +117,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC4b(ACC, Fs); \ } \ } - +// FMAC5 - FMAC BC(xyzw) Opcodes Storing Result to ACC #define mVU_FMAC5(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC5a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -133,13 +130,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC5b(ACC, Fs); \ } \ } - +// FMAC6 - Normal FMAC Opcodes (I Reg) #define mVU_FMAC6(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC6a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -147,13 +143,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC6b(Fd); \ } \ } - +// FMAC7 - FMAC Opcodes Storing Result to ACC (I Reg) #define mVU_FMAC7(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC7a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -161,13 +156,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC7b(ACC, Fs); \ } \ } - +// FMAC8 - MADD FMAC Opcode Storing Result to Fd #define mVU_FMAC8(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC8a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -181,13 +175,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC8b(Fd); \ } \ } - +// FMAC9 - MSUB FMAC Opcode Storing Result to Fd #define mVU_FMAC9(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC9a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -201,13 +194,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC9b(Fd); \ } \ } - +// FMAC10 - MADD FMAC BC(xyzw) Opcode Storing Result to Fd #define mVU_FMAC10(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC10a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -221,13 +213,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC10b(Fd); \ } \ } - +// FMAC11 - MSUB FMAC BC(xyzw) Opcode Storing Result to Fd #define mVU_FMAC11(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC11a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -241,13 +232,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC11b(Fd); \ } \ } - +// FMAC12 - MADD FMAC Opcode Storing Result to Fd (I Reg) #define mVU_FMAC12(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC12a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -261,13 +251,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC12b(Fd); \ } \ } - +// FMAC13 - MSUB FMAC Opcode Storing Result to Fd (I Reg) #define mVU_FMAC13(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC13a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -281,13 +270,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC13b(Fd); \ } \ } - +// FMAC14 - MADDA FMAC Opcode #define mVU_FMAC14(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC14a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -301,13 +289,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC14b(ACCw, Fs); \ } \ } - +// FMAC15 - MSUBA FMAC Opcode #define mVU_FMAC15(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC15a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -321,13 +308,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC15b(ACCw, ACCr); \ } \ } - +// FMAC16 - MADDA BC(xyzw) FMAC Opcode #define mVU_FMAC16(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC16a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -341,13 +327,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC16b(ACCw, Fs); \ } \ } - +// FMAC17 - MSUBA BC(xyzw) FMAC Opcode #define mVU_FMAC17(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC17a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -361,26 +346,24 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC17b(ACCw, ACCr); \ } \ } - +// FMAC18 - OPMULA FMAC Opcode #define mVU_FMAC18(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC18a(ACC, Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ mVUallocFMAC18b(ACC, Fs); \ } \ } - +// FMAC19 - OPMULA FMAC Opcode #define mVU_FMAC19(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC19a(Fd, ACC, Fs, Ft); \ SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ @@ -388,13 +371,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC19b(Fd); \ } \ } - +// FMAC20 - MADDA FMAC Opcode (I Reg) #define mVU_FMAC20(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC20a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -408,13 +390,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC20b(ACCw, Fs); \ } \ } - +// FMAC21 - MSUBA FMAC Opcode (I Reg) #define mVU_FMAC21(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC21a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -428,13 +409,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC21b(ACCw, ACCr); \ } \ } - +// FMAC22 - Normal FMAC Opcodes (Q Reg) #define mVU_FMAC22(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC22a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -442,13 +422,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC22b(Fd); \ } \ } - +// FMAC23 - FMAC Opcodes Storing Result to ACC (Q Reg) #define mVU_FMAC23(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC23a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -456,13 +435,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC23b(ACC, Fs); \ } \ } - +// FMAC24 - MADD FMAC Opcode Storing Result to Fd (Q Reg) #define mVU_FMAC24(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC24a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -476,13 +454,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC24b(Fd); \ } \ } - +// FMAC25 - MSUB FMAC Opcode Storing Result to Fd (Q Reg) #define mVU_FMAC25(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC25a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -496,13 +473,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC25b(Fd); \ } \ } - +// FMAC26 - MADDA FMAC Opcode (Q Reg) #define mVU_FMAC26(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC26a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -516,13 +492,12 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC26b(ACCw, Fs); \ } \ } - +// FMAC27 - MSUBA FMAC Opcode (Q Reg) #define mVU_FMAC27(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC27a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -546,7 +521,6 @@ microVUf(void) mVU_ABS() { if (recPass == 0) {} else { int Fs, Ft; - if (isNOP) return; mVUallocFMAC2a(Fs, Ft); SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip); mVUallocFMAC1b(Ft); @@ -646,7 +620,6 @@ microVUq(void) mVU_FTOIx(uptr addr) { if (recPass == 0) {} else { int Fs, Ft; - if (isNOP) return; mVUallocFMAC2a(Fs, Ft); // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() @@ -672,7 +645,6 @@ microVUq(void) mVU_ITOFx(uptr addr) { if (recPass == 0) {} else { int Fs, Ft; - if (isNOP) return; mVUallocFMAC2a(Fs, Ft); SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs);