From 1e4f48437fd85152b58be8b4d441581ff4964b54 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Tue, 31 Mar 2009 18:52:43 +0000 Subject: [PATCH 01/21] Shortcut for intc_stat reads. Speeds up games that use it a lot. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@877 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/HwRead.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pcsx2/HwRead.cpp b/pcsx2/HwRead.cpp index cf86b7cbbb..557a0dea46 100644 --- a/pcsx2/HwRead.cpp +++ b/pcsx2/HwRead.cpp @@ -214,12 +214,6 @@ static __forceinline mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) switch( mem ) { - case 0xf000: - if( intchack ) IntCHackCheck(); - // This one is checked alot, so leave it commented out unless you love 600 meg logfiles. - //HW_LOG("INTC_STAT Read 32bit %x", psHu32(0xf010)); - break; - case 0xf010: HW_LOG("INTC_MASK Read32, value=0x%x", psHu32(INTC_MASK)); break; @@ -261,12 +255,22 @@ static __forceinline mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) } } return 0; + + case 0xf000: + //Put this back on top in case you remove the shortcut for intc_stat register (see below function) (rama). + if( intchack ) IntCHackCheck(); + // This one is checked alot, so leave it commented out unless you love 600 meg logfiles. + //HW_LOG("INTC_STAT Read 32bit %x", psHu32(0xf010)); + break; } return *((u32*)&PS2MEM_HW[mem]); } mem32_t __fastcall hwRead32_page_0F(u32 mem) { + if (mem == 0x1000f000) //shortcut for intc_stat + return *((u32*)&PS2MEM_HW[0xF000]); + return __hwRead32_page_0F( mem, false ); } From a547ef4cea98a0700c57e098f4d8c691529b70d7 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 1 Apr 2009 08:26:35 +0000 Subject: [PATCH 02/21] for april fools i can put on my next commit that microVU is finished and doubles fps but i guess i shouldn't do that on the svn :D or maybe i can >.> <.< >.> git-svn-id: http://pcsx2.googlecode.com/svn/trunk@878 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Lower.inl | 63 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 70e5b1832e..7d6d83a51b 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -23,18 +23,27 @@ // Micro VU Micromode Lower instructions //------------------------------------------------------------------ -#define testZero(xmmReg, xmmTemp, gprTemp) { \ - SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ - SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if each vector is zero */ \ - SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); /* Move the sign bits */ \ - TEST32ItoR(gprTemp, 1); /* Test "Is Zero" bit */ \ +#define testZero(xmmReg, xmmTemp, gprTemp) { \ + SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ + SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \ + SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); /* Move the sign bits */ \ + TEST32ItoR(gprTemp, 1); /* Test "Is Zero" bit */ \ +} + +#define testNeg(xmmReg, gprTemp, aJump) { \ + SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ + TEST32ItoR(gprTemp, 1); /* Check sign bit */ \ + aJump = JZ8(0); /* Skip if positive */ \ + MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \ + SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \ + x86SetJ8(aJump); \ } microVUf(void) mVU_DIV() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_); } else { - u8 *ajmp, *bjmp, *cjmp, *djmp; + u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFt, _Ft_, _Ftf_); @@ -69,17 +78,11 @@ microVUf(void) mVU_SQRT() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_); } else { - u8* ajmp; + u8 *ajmp; getReg5(xmmFt, _Ft_, _Ftf_); - MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags - /* Check for negative sqrt */ - SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmFt); - AND32ItoR(gprT1, 1); //Check sign - ajmp = JZ8(0); //Skip if none are - MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Invalid Flag - Negative number sqrt - SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt - x86SetJ8(ajmp); + MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags + testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive) SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); @@ -92,39 +95,33 @@ microVUf(void) mVU_RSQRT() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_); } else { - u8 *ajmp8, *bjmp8, *cjmp8, *djmp8; + u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFt, _Ft_, _Ftf_); - MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags - /* Check for negative divide */ - SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); - AND32ItoR(gprT1, 1); //Check sign - ajmp8 = JZ8(0); //Skip if none are - MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Invalid Flag - Negative number sqrt - SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt - x86SetJ8(ajmp8); + MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags + testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); testZero(xmmFt, xmmT1, gprT1); // Test if Ft is zero - ajmp8 = JZ8(0); // Skip if not zero + ajmp = JZ8(0); // Skip if not zero testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero - bjmp8 = JZ8(0); // Skip if none are + bjmp = JZ8(0); // Skip if none are MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0) - cjmp8 = JMP8(0); - x86SetJ8(bjmp8); + cjmp = JMP8(0); + x86SetJ8(bjmp); MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0) - x86SetJ8(cjmp8); + x86SetJ8(cjmp); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // xmmFs = +/-Max - djmp8 = JMP8(0); - x86SetJ8(ajmp8); + djmp = JMP8(0); + x86SetJ8(ajmp); SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt); mVUclamp1(xmmFs, xmmFt, 8); - x86SetJ8(djmp8); + x86SetJ8(djmp); mVUunpack_xyzw(xmmFs, xmmFs, 0); mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8); @@ -184,7 +181,7 @@ microVUf(void) mVU_EATANxy() { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); - SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt); // y-x, not y-1? >< SSE_ADDSS_XMM_to_XMM(xmmFt, xmmPQ); SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt); From 65a4061c3a89579ed5cdaf0d2399e5223b146468 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 1 Apr 2009 11:55:29 +0000 Subject: [PATCH 03/21] Still messing with Sif, Vif, and SPR. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@879 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Hw.cpp | 169 ++++++++++++++++++++++++------------------ pcsx2/Hw.h | 2 +- pcsx2/SPR.cpp | 54 +++++++------- pcsx2/Sif.cpp | 92 ++++++++++------------- pcsx2/Sif.h | 11 +-- pcsx2/Sifcmd.h | 189 ++++++++++++++++++++++++----------------------- pcsx2/VifDma.cpp | 6 +- 7 files changed, 266 insertions(+), 257 deletions(-) diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index 1b34b55f05..bb3087bfce 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -130,21 +130,17 @@ int hwMFIFOWrite(u32 addr, u8 *data, u32 size) { /* it does, so first copy 's1' bytes from 'data' to 'addr' */ dst = (u8*)PSM(addr); if (dst == NULL) return -1; - //Cpu->Clear(addr, s1/4); memcpy_fast(dst, data, s1); /* and second copy 's2' bytes from '&data[s1]' to 'maddr' */ dst = (u8*)PSM(psHu32(DMAC_RBOR)); if (dst == NULL) return -1; - //Cpu->Clear(psHu32(DMAC_RBOR), s2/4); memcpy_fast(dst, &data[s1], s2); - } else { - //u32 * tempptr, * tempptr2; - + } + else { /* it doesn't, so just copy 'size' bytes from 'data' to 'addr' */ dst = (u8*)PSM(addr); if (dst == NULL) return -1; - //Cpu->Clear(addr, size/4); memcpy_fast(dst, data, size); } @@ -157,7 +153,6 @@ int hwDmacSrcChainWithStack(DMACh *dma, int id) { switch (id) { case 0: // Refe - Transfer Packet According to ADDR field - //dma->tadr += 16; return 1; //End Transfer case 1: // CNT - Transfer QWC following the tag. @@ -184,7 +179,8 @@ int hwDmacSrcChainWithStack(DMACh *dma, int id) { if ((dma->chcr & 0x30) == 0x0) { //Check if ASR0 is empty dma->asr0 = dma->madr + (dma->qwc << 4); //If yes store Succeeding tag dma->chcr = (dma->chcr & 0xffffffcf) | 0x10; //1 Address in call stack - }else if((dma->chcr & 0x30) == 0x10){ + } + else if((dma->chcr & 0x30) == 0x10){ dma->chcr = (dma->chcr & 0xffffffcf) | 0x20; //2 Addresses in call stack dma->asr1 = dma->madr + (dma->qwc << 4); //If no store Succeeding tag in ASR1 }else { @@ -202,7 +198,8 @@ int hwDmacSrcChainWithStack(DMACh *dma, int id) { dma->chcr = (dma->chcr & 0xffffffcf) | 0x10; //1 Address left in call stack dma->tadr = dma->asr1; //Read ASR1 as next tag dma->asr1 = 0; //Clear ASR1 - } else { //If ASR1 is empty (No address held) + } + else { //If ASR1 is empty (No address held) if((dma->chcr & 0x30) == 0x10) { //Check if ASR0 is NOT equal to 0 (Contains address) dma->chcr = (dma->chcr & 0xffffffcf); //No addresses left in call stack dma->tadr = dma->asr0; //Read ASR0 as next tag @@ -216,8 +213,7 @@ int hwDmacSrcChainWithStack(DMACh *dma, int id) { case 7: // End - Transfer QWC following the tag dma->madr = dma->tadr + 16; //Set MADR to data following the tag - //comment out tadr fixes lemans - //dma->tadr = dma->madr + (dma->qwc << 4); //Dont Increment tag, breaks Soul Calibur II and III + //Dont Increment tadr, breaks Soul Calibur II and III return 1; //End Transfer } @@ -229,7 +225,6 @@ int hwDmacSrcChain(DMACh *dma, int id) { switch (id) { case 0: // Refe - Transfer Packet According to ADDR field - //dma->tadr += 16; return 1; //End Transfer case 1: // CNT - Transfer QWC following the tag. @@ -250,7 +245,7 @@ int hwDmacSrcChain(DMACh *dma, int id) { case 7: // End - Transfer QWC following the tag dma->madr = dma->tadr + 16; //Set MADR to data following the tag - //dma->tadr = dma->madr + (dma->qwc << 4); //Dont Increment tag, breaks Soul Calibur II and III + //Dont Increment tadr, breaks Soul Calibur II and III return 1; //End Transfer } @@ -467,18 +462,30 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) case GIF_CTRL: //Console::WriteLn("GIF_CTRL write %x", params value); psHu32(mem) = value & 0x8; - if (value & 0x1) gsGIFReset(); - else if( value & 8 ) psHu32(GIF_STAT) |= 8; - else psHu32(GIF_STAT) &= ~8; + + if (value & 0x1) + gsGIFReset(); + else if( value & 8 ) + psHu32(GIF_STAT) |= 8; + else + psHu32(GIF_STAT) &= ~8; + return; case GIF_MODE: // need to set GIF_MODE (hamster ball) psHu32(GIF_MODE) = value; - if (value & 0x1) psHu32(GIF_STAT)|= 0x1; - else psHu32(GIF_STAT)&= ~0x1; - if (value & 0x4) psHu32(GIF_STAT)|= 0x4; - else psHu32(GIF_STAT)&= ~0x4; + + if (value & 0x1) + psHu32(GIF_STAT)|= 0x1; + else + psHu32(GIF_STAT)&= ~0x1; + + if (value & 0x4) + psHu32(GIF_STAT)|= 0x4; + else + psHu32(GIF_STAT)&= ~0x4; + break; case GIF_STAT: // stat is readonly @@ -489,153 +496,170 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) DMA_LOG("VIF0dma %lx", value); DmaExec(dmaVIF0, mem, value); break; -//------------------------------------------------------------------ + case 0x10009000: // dma1 - vif1 - chcr DMA_LOG("VIF1dma CHCR %lx", value); DmaExec(dmaVIF1, mem, value); break; + #ifdef PCSX2_DEVBUILD case 0x10009010: // dma1 - vif1 - madr HW_LOG("VIF1dma Madr %lx", value); psHu32(mem) = value;//dma1 madr break; + case 0x10009020: // dma1 - vif1 - qwc HW_LOG("VIF1dma QWC %lx", value); psHu32(mem) = value;//dma1 qwc break; + case 0x10009030: // dma1 - vif1 - tadr HW_LOG("VIF1dma TADR %lx", value); psHu32(mem) = value;//dma1 tadr break; + case 0x10009040: // dma1 - vif1 - asr0 HW_LOG("VIF1dma ASR0 %lx", value); psHu32(mem) = value;//dma1 asr0 break; + case 0x10009050: // dma1 - vif1 - asr1 HW_LOG("VIF1dma ASR1 %lx", value); psHu32(mem) = value;//dma1 asr1 break; + case 0x10009080: // dma1 - vif1 - sadr HW_LOG("VIF1dma SADR %lx", value); psHu32(mem) = value;//dma1 sadr break; #endif -//------------------------------------------------------------------ + case 0x1000a000: // dma2 - gif DMA_LOG("0x%8.8x hwWrite32: GSdma %lx", cpuRegs.cycle, value); DmaExec(dmaGIF, mem, value); break; + #ifdef PCSX2_DEVBUILD - case 0x1000a010: - psHu32(mem) = value;//dma2 madr + case 0x1000a010: + psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write DMA2_MADR 32bit at %x with value %x",mem,value); - break; - case 0x1000a020: - psHu32(mem) = value;//dma2 qwc - HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); - break; - case 0x1000a030: - psHu32(mem) = value;//dma2 taddr - HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); - break; - case 0x1000a040: - psHu32(mem) = value;//dma2 asr0 - HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); - break; - case 0x1000a050: - psHu32(mem) = value;//dma2 asr1 - HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); - break; - case 0x1000a080: - psHu32(mem) = value;//dma2 saddr - HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); - break; + break; + + case 0x1000a020: + psHu32(mem) = value;//dma2 qwc + HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); + break; + + case 0x1000a030: + psHu32(mem) = value;//dma2 taddr + HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); + break; + + case 0x1000a040: + psHu32(mem) = value;//dma2 asr0 + HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); + break; + + case 0x1000a050: + psHu32(mem) = value;//dma2 asr1 + HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); + break; + + case 0x1000a080: + psHu32(mem) = value;//dma2 saddr + HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); + break; #endif -//------------------------------------------------------------------ + case 0x1000b000: // dma3 - fromIPU DMA_LOG("IPU0dma %lx", value); DmaExec(dmaIPU0, mem, value); break; -//------------------------------------------------------------------ + #ifdef PCSX2_DEVBUILD case 0x1000b010: psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_MADR 32bit at %x with value %x",mem,value); break; + case 0x1000b020: - psHu32(mem) = value;//dma2 madr + psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_QWC 32bit at %x with value %x",mem,value); - break; + break; + case 0x1000b030: psHu32(mem) = value;//dma2 tadr HW_LOG("Hardware write IPU0DMA_TADR 32bit at %x with value %x",mem,value); break; + case 0x1000b080: psHu32(mem) = value;//dma2 saddr HW_LOG("Hardware write IPU0DMA_SADDR 32bit at %x with value %x",mem,value); break; #endif -//------------------------------------------------------------------ + case 0x1000b400: // dma4 - toIPU DMA_LOG("IPU1dma %lx", value); DmaExec(dmaIPU1, mem, value); break; -//------------------------------------------------------------------ + #ifdef PCSX2_DEVBUILD case 0x1000b410: - psHu32(mem) = value;//dma2 madr + psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_MADR 32bit at %x with value %x",mem,value); - break; + break; + case 0x1000b420: - psHu32(mem) = value;//dma2 madr + psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_QWC 32bit at %x with value %x",mem,value); - break; + break; + case 0x1000b430: psHu32(mem) = value;//dma2 tadr HW_LOG("Hardware write IPU1DMA_TADR 32bit at %x with value %x",mem,value); break; + case 0x1000b480: psHu32(mem) = value;//dma2 saddr HW_LOG("Hardware write IPU1DMA_SADDR 32bit at %x with value %x",mem,value); break; #endif -//------------------------------------------------------------------ case 0x1000c000: // dma5 - sif0 DMA_LOG("SIF0dma %lx", value); - //if (value == 0) psxSu32(0x30) = 0x40000; DmaExec(dmaSIF0, mem, value); break; -//------------------------------------------------------------------ + case 0x1000c400: // dma6 - sif1 DMA_LOG("SIF1dma %lx", value); DmaExec(dmaSIF1, mem, value); break; + #ifdef PCSX2_DEVBUILD case 0x1000c420: // dma6 - sif1 - qwc HW_LOG("SIF1dma QWC = %lx", value); psHu32(mem) = value; break; + case 0x1000c430: // dma6 - sif1 - tadr HW_LOG("SIF1dma TADR = %lx", value); psHu32(mem) = value; break; #endif -//------------------------------------------------------------------ case 0x1000c800: // dma7 - sif2 DMA_LOG("SIF2dma %lx", value); DmaExec(dmaSIF2, mem, value); break; -//------------------------------------------------------------------ + case 0x1000d000: // dma8 - fromSPR DMA_LOG("fromSPRdma %lx", value); DmaExec(dmaSPR0, mem, value); break; -//------------------------------------------------------------------ + case 0x1000d400: // dma9 - toSPR DMA_LOG("toSPRdma %lx", value); DmaExec(dmaSPR1, mem, value); break; -//------------------------------------------------------------------ + case 0x1000e000: // DMAC_CTRL HW_LOG("DMAC_CTRL Write 32bit %x", value); psHu32(0xe000) = value; @@ -648,11 +672,10 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) cpuTestDMACInts(); break; -//------------------------------------------------------------------ + case 0x1000f000: // INTC_STAT HW_LOG("INTC_STAT Write 32bit %x", value); psHu32(0xf000)&=~value; - //cpuTestINTCInts(); break; case 0x1000f010: // INTC_MASK @@ -660,7 +683,7 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) psHu32(0xf010) ^= (u16)value; cpuTestINTCInts(); break; -//------------------------------------------------------------------ + case 0x1000f430://MCH_RICM: x:4|SA:12|x:5|SDEV:1|SOP:4|SBC:1|SDEV:5 if ((((value >> 16) & 0xFFF) == 0x21) && (((value >> 6) & 0xF) == 1) && (((psHu32(0xf440) >> 7) & 1) == 0))//INIT & SRP=0 rdram_sdevid = 0; // if SIO repeater is cleared, reset sdevid @@ -670,37 +693,41 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) case 0x1000f440://MCH_DRD: psHu32(mem) = value; break; -//------------------------------------------------------------------ + case 0x1000f590: // DMAC_ENABLEW HW_LOG("DMAC_ENABLEW Write 32bit %lx", value); psHu32(0xf590) = value; psHu32(0xf520) = value; return; -//------------------------------------------------------------------ + case 0x1000f200: psHu32(mem) = value; break; + case 0x1000f220: psHu32(mem) |= value; break; + case 0x1000f230: psHu32(mem) &= ~value; break; + case 0x1000f240: if(!(value & 0x100)) psHu32(mem) &= ~0x100; else psHu32(mem) |= 0x100; break; + case 0x1000f260: psHu32(mem) = 0; break; -//------------------------------------------------------------------ + case 0x1000f130: case 0x1000f410: HW_LOG("Unknown Hardware write 32 at %x with value %x (%x)", mem, value, cpuRegs.CP0.n.Status.val); break; -//------------------------------------------------------------------ + default: psHu32(mem) = value; HW_LOG("Unknown Hardware write 32 at %x with value %x (%x)", mem, value, cpuRegs.CP0.n.Status.val); @@ -710,7 +737,7 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) #endif -#if 0 +/* __forceinline void hwWrite64(u32 mem, u64 value) { u32 val32; @@ -846,4 +873,4 @@ __forceinline void hwWrite128(u32 mem, const u64 *value) break; } } -#endif \ No newline at end of file +*/ \ No newline at end of file diff --git a/pcsx2/Hw.h b/pcsx2/Hw.h index aff11a20e3..07f21049ce 100644 --- a/pcsx2/Hw.h +++ b/pcsx2/Hw.h @@ -328,7 +328,7 @@ static __forceinline u8* dmaGetAddr(u32 mem) #ifdef _WIN32 // do manual LUT since IPU/SPR seems to use addrs 0x3000xxxx quite often - // linux doesn't suffer from this because it has better vm support + // linux doesn't suffer from this because it has better vm support if( memLUT[ (p-PS2MEM_BASE)>>12 ].aPFNs == NULL ) { Console::WriteLn("dmaGetAddr: memLUT PFN warning"); return NULL;//p; diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index 97e8510206..bb47768ba8 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -98,7 +98,7 @@ void _SPR0interleave() int qwc = spr0->qwc; int sqwc = psHu32(DMAC_SQWC) & 0xff; int tqwc = (psHu32(DMAC_SQWC) >> 16) & 0xff; - int cycles = 0; + //int cycles = 0; u32 *pMem; if (tqwc == 0) tqwc = qwc; @@ -111,8 +111,8 @@ void _SPR0interleave() spr0->qwc = std::min(tqwc, qwc); qwc -= spr0->qwc; pMem = (u32*)dmaGetAddr(spr0->madr); - if ((psHu32(DMAC_CTRL) & 0xC) == 0xC || // GIF MFIFO - (psHu32(DMAC_CTRL) & 0xC) == 0x8) // VIF1 MFIFO + if ((((psHu32(DMAC_CTRL) & 0xC) == 0xC) || // GIF MFIFO + (psHu32(DMAC_CTRL) & 0xC) == 0x8)) // VIF1 MFIFO { hwMFIFOWrite(spr0->madr, (u8*)&PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); mfifotransferred += spr0->qwc; @@ -123,7 +123,7 @@ void _SPR0interleave() TestClearVUs(spr0->madr, spr0->qwc << 2); memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); } - cycles += tqwc * BIAS; + //cycles += tqwc * BIAS; spr0->sadr += spr0->qwc * 16; spr0->madr += (sqwc + spr0->qwc) * 16; //qwc-= sqwc; } @@ -153,7 +153,7 @@ static __forceinline void _dmaSPR0() int cycles = 0; u32 *ptag; int id; - int done = 0; + bool done = FALSE; if (spr0->qwc > 0) { @@ -169,7 +169,7 @@ static __forceinline void _dmaSPR0() spr0->chcr = (spr0->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); //Transfer upper part of tag to CHCR bits 31-15 - id = (ptag[0] >> 28) & 0x7; //ID for DmaChain copied from bit 28 of the tag + id = (ptag[0] >> 28) & 0x7; //ID for DmaChain copied from bit 28 of the tag spr0->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag spr0->madr = ptag[1]; //MADR = ADDR field @@ -188,27 +188,28 @@ static __forceinline void _dmaSPR0() break; case 1: // CNT - Transfer QWC following the tag. - done = 0; + done = FALSE; break; case 7: // End - Transfer QWC following the tag - done = 1; //End Transfer + done = TRUE; break; } SPR0chain(); if (spr0->chcr & 0x80 && ptag[0] >> 31) //Check TIE bit of CHCR and IRQ bit of tag { //Console::WriteLn("SPR0 TIE"); - done = 1; + done = TRUE; spr0->qwc = 0; } + + spr0finished = (done) ? 1 : 0; - spr0finished = done; - if (done == 0) + if (!done) { ptag = (u32*) & PS2MEM_SCRATCH[spr0->sadr & 0x3fff]; //Set memory pointer to SADR - spr0->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag - CPU_INT(8, spr0->qwc / BIAS); + //spr0->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag + CPU_INT(8, ((u16)ptag[0]) / BIAS); //spr0->qwc / BIAS); spr0->qwc = 0; return; } @@ -219,9 +220,6 @@ static __forceinline void _dmaSPR0() { _SPR0interleave(); } - - - } void SPRFROMinterrupt() @@ -253,8 +251,6 @@ void SPRFROMinterrupt() void dmaSPR0() // fromSPR { - - SPR_LOG("dmaSPR0 chcr = %lx, madr = %lx, qwc = %lx, sadr = %lx", spr0->chcr, spr0->madr, spr0->qwc, spr0->sadr); @@ -303,7 +299,7 @@ void _SPR1interleave() int qwc = spr1->qwc; int sqwc = psHu32(DMAC_SQWC) & 0xff; int tqwc = (psHu32(DMAC_SQWC) >> 16) & 0xff; - int cycles = 0; + //int cycles = 0; u32 *pMem; if (tqwc == 0) tqwc = qwc; @@ -317,7 +313,7 @@ void _SPR1interleave() pMem = (u32*)dmaGetAddr(spr1->madr); memcpy_fast(&PS2MEM_SCRATCH[spr1->sadr & 0x3fff], (u8*)pMem, spr1->qwc << 4); spr1->sadr += spr1->qwc * 16; - cycles += spr1->qwc * BIAS; + //cycles += spr1->qwc * BIAS; spr1->madr += (sqwc + spr1->qwc) * 16; //qwc-= sqwc; } @@ -339,7 +335,8 @@ void _dmaSPR1() // toSPR work function { int cycles = 0; u32 *ptag; - int id, done = 0; + int id; + bool done = FALSE; if (spr1->qwc > 0) { @@ -356,8 +353,8 @@ void _dmaSPR1() // toSPR work function Console::WriteLn("SPR1 Tag BUSERR"); spr1->chcr = (spr1->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); //Transfer upper part of tag to CHCR bits 31-15 psHu32(DMAC_STAT) |= 1 << 15; //If yes, set BEIS (BUSERR) in DMAC_STAT register - done = 1; - spr1finished = done; + done = TRUE; + spr1finished = (done) ? 1: 0; return; } spr1->chcr = (spr1->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); //Transfer upper part of tag to CHCR bits 31-15 @@ -376,7 +373,7 @@ void _dmaSPR1() // toSPR work function SPR_LOG("spr1 dmaChain %8.8x_%8.8x size=%d, id=%d, addr=%lx", ptag[1], ptag[0], spr1->qwc, id, spr1->madr); - done = hwDmacSrcChain(spr1, id); + done = (hwDmacSrcChain(spr1, id) == 1); SPR1chain(); //Transfers the data set by the switch if (spr1->chcr & 0x80 && ptag[0] >> 31) //Check TIE bit of CHCR and IRQ bit of tag @@ -385,15 +382,15 @@ void _dmaSPR1() // toSPR work function //Console::WriteLn("SPR1 TIE"); spr1->qwc = 0; - done = 1; + done = TRUE; } spr1finished = done; - if (done == 0) + if (!done) { ptag = (u32*)dmaGetAddr(spr1->tadr); //Set memory pointer to TADR - spr1->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag - CPU_INT(9, spr1->qwc / BIAS); + //spr1->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag + CPU_INT(9, (((u16)ptag[0]) / BIAS));// spr1->qwc / BIAS); spr1->qwc = 0; } } @@ -416,7 +413,6 @@ void dmaSPR1() // toSPR u32 *ptag; ptag = (u32*)dmaGetAddr(spr1->tadr); //Set memory pointer to TADR CPU_INT(9, (ptag[0] & 0xffff) / BIAS); - //spr1->qwc = 0; return; } // COMPLETE HACK!!! For now at least.. FFX Videos dont rely on interrupts or reading DMA values diff --git a/pcsx2/Sif.cpp b/pcsx2/Sif.cpp index 78c006c58d..796be3f9c7 100644 --- a/pcsx2/Sif.cpp +++ b/pcsx2/Sif.cpp @@ -40,26 +40,26 @@ DMACh *sif2ch; struct _sif0 { u32 fifoData[FIFO_SIF0_W]; - int fifoReadPos; - int fifoWritePos; - int fifoSize; - int chain; - int end; - int tagMode; - int counter; + s32 fifoReadPos; + s32 fifoWritePos; + s32 fifoSize; + s32 chain; + s32 end; + s32 tagMode; + s32 counter; struct sifData sifData; }; struct _sif1 { u32 fifoData[FIFO_SIF1_W]; - int fifoReadPos; - int fifoWritePos; - int fifoSize; - int chain; - int end; - int tagMode; - int counter; + s32 fifoReadPos; + s32 fifoWritePos; + s32 fifoSize; + s32 chain; + s32 end; + s32 tagMode; + s32 counter; }; static _sif0 sif0; @@ -85,7 +85,6 @@ static __forceinline void SIF0write(u32 *from, int words) memcpy(&sif0.fifoData[0], &from[wP0], wP1 << 2); sif0.fifoWritePos = (sif0.fifoWritePos + words) & (FIFO_SIF0_W - 1); - sif0.fifoSize += words; SIF_LOG(" SIF0 + %d = %d (pos=%d)", words, sif0.fifoSize, sif0.fifoWritePos); } @@ -132,7 +131,7 @@ static __forceinline void SIF1read(u32 *to, int words) __forceinline void SIF0Dma() { u32 *ptag; - int notDone = TRUE; + bool done = FALSE; int cycles = 0, psxCycles = 0; SIF_LOG("SIF0 DMA start..."); @@ -157,7 +156,7 @@ __forceinline void SIF0Dma() PSX_INT(IopEvt_SIF0, psxCycles); sif0.sifData.data = 0; - notDone = FALSE; + done = TRUE; } else // Chain mode { @@ -171,13 +170,13 @@ __forceinline void SIF0Dma() HW_DMA9_MADR = sif0.sifData.data & 0xFFFFFF; HW_DMA9_TADR += 16; ///HW_DMA9_MADR + 16 + sif0.sifData.words << 2; sif0.counter = sif0.sifData.words & 0xFFFFFF; - notDone = TRUE; SIF_LOG(" SIF0 Tag: madr=%lx, tadr=%lx, counter=%lx (%08X_%08X)", HW_DMA9_MADR, HW_DMA9_TADR, sif0.counter, sif0.sifData.words, sif0.sifData.data); if (sif0.sifData.data & 0x40000000) SIF_LOG(" END"); else SIF_LOG(" CNT %08X, %08X", sif0.sifData.data, sif0.sifData.words); + done = FALSE; } } else // There's some data ready to transfer into the fifo.. @@ -220,23 +219,16 @@ __forceinline void SIF0Dma() if (sif0dma->qwc == 0) { - if ((sif0dma->chcr & 0x80000080) == 0x80000080) // Stop on tag IRQ + if (((sif0dma->chcr & 0x80000080) == 0x80000080) || (sif0.end)) // Stop on tag IRQ or END { - // Tag interrupt - SIF_LOG(" EE SIF interrupt"); + if (sif0.end) + SIF_LOG(" EE SIF end"); + else + SIF_LOG(" EE SIF interrupt"); eesifbusy[0] = 0; CPU_INT(5, cycles*BIAS); - notDone = FALSE; - } - else if (sif0.end) // Stop on tag END - { - // End tag. - SIF_LOG(" EE SIF end"); - - eesifbusy[0] = 0; - CPU_INT(5, cycles*BIAS); - notDone = FALSE; + done = TRUE; } else if (sif0.fifoSize >= 4) // Read a tag { @@ -252,22 +244,22 @@ __forceinline void SIF0Dma() if ((psHu32(DMAC_CTRL) & 0x30) != 0 && ((tag[0] >> 28)&3) == 0) psHu32(DMAC_STADR) = sif0dma->madr + (sif0dma->qwc * 16); - notDone = TRUE; sif0.chain = 1; if (tag[0] & 0x40000000) sif0.end = 1; + done = FALSE; } } } } - while (notDone); + while (!done); } __forceinline void SIF1Dma() { int id; u32 *ptag; - bool notDone = true; + bool done = FALSE; int cycles = 0, psxCycles = 0; do { @@ -284,7 +276,7 @@ __forceinline void SIF1Dma() // Stop & signal interrupts on EE SIF_LOG("EE SIF1 End %x", sif1.end); eesifbusy[1] = 0; - notDone = FALSE; + done = TRUE; CPU_INT(6, cycles*BIAS); sif1.chain = 0; sif1.end = 0; @@ -292,7 +284,7 @@ __forceinline void SIF1Dma() else // Chain mode { // Process DMA tag at sif1dma->tadr - notDone = TRUE; + done = FALSE; _dmaGetAddr(sif1dma, ptag, sif1dma->tadr, 6); sif1dma->chcr = (sif1dma->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); // Copy the tag sif1dma->qwc = (u16)ptag[0]; @@ -390,23 +382,17 @@ __forceinline void SIF1Dma() if (sif1.counter <= 0) { - if (sif1.tagMode & 0x80) // Stop on tag IRQ + if ((sif1.tagMode & 0x80) || (sif1.tagMode & 0x40)) // Stop on tag IRQ or END { - // Tag interrupt - SIF_LOG(" IOP SIF interrupt"); + if (sif1.tagMode & 0x40) + SIF_LOG(" IOP SIF end"); + else + SIF_LOG(" IOP SIF interrupt"); + iopsifbusy[1] = 0; PSX_INT(IopEvt_SIF1, psxCycles); sif1.tagMode = 0; - notDone = FALSE; - } - else if (sif1.tagMode & 0x40) // Stop on tag END - { - // End tag. - SIF_LOG(" IOP SIF end"); - iopsifbusy[1] = 0; - PSX_INT(IopEvt_SIF1, psxCycles); - sif1.tagMode = 0; - notDone = FALSE; + done = TRUE; } else if (sif1.fifoSize >= 4) // Read a tag { @@ -416,12 +402,12 @@ __forceinline void SIF1Dma() HW_DMA10_MADR = d.data & 0xffffff; sif1.counter = d.words; sif1.tagMode = (d.data >> 24) & 0xFF; - notDone = TRUE; + done = FALSE; } } } } - while (notDone); + while (!done); } __forceinline void sif0Interrupt() @@ -460,7 +446,7 @@ __forceinline void dmaSIF0() psHu32(0x1000F240) |= 0x2000; eesifbusy[0] = 1; - if (eesifbusy[0] == 1 && iopsifbusy[0] == 1) + if (iopsifbusy[0] == 1) { FreezeXMMRegs(1); hwIntcIrq(INTC_SBUS); @@ -483,7 +469,7 @@ __forceinline void dmaSIF1() psHu32(0x1000F240) |= 0x4000; eesifbusy[1] = 1; - if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) + if (iopsifbusy[1] == 1) { FreezeXMMRegs(1); SIF1Dma(); diff --git a/pcsx2/Sif.h b/pcsx2/Sif.h index 9436a1e461..5812492451 100644 --- a/pcsx2/Sif.h +++ b/pcsx2/Sif.h @@ -19,11 +19,12 @@ #ifndef __SIF_H__ #define __SIF_H__ -struct sifData{ - int data, - words, - count, - addr; +struct sifData +{ + s32 data; + s32 words; + s32 count; + s32 addr; }; extern DMACh *sif0ch; diff --git a/pcsx2/Sifcmd.h b/pcsx2/Sifcmd.h index aef3a68af7..7a32c9470e 100644 --- a/pcsx2/Sifcmd.h +++ b/pcsx2/Sifcmd.h @@ -25,46 +25,49 @@ struct t_sif_cmd_header { - u32 size; - void *dest; - int command; - u32 unknown; + u32 size; + void *dest; + s32 command; + u32 unknown; }; struct t_sif_dma_transfer { - void *src, - *dest; - int size; - int attr; + void *src; + void *dest; + s32 size; + s32 attr; }; struct t_sif_handler { - void (*handler) ( void *a, void *b); - void *buff; + void (*handler)(void *a, void *b); + void *buff; }; #define SYSTEM_CMD_CHANGE_SADDR 0x80000000 #define SYSTEM_CMD_INIT_CMD 0x80000002 -struct t_sif_saddr{ +struct t_sif_saddr +{ struct t_sif_cmd_header hdr; //+00 - void *newaddr; //+10 + void *newaddr; //+10 }; //=14 #define SYSTEM_CMD_SET_SREG 0x80000001 -struct t_sif_sreg{ +struct t_sif_sreg +{ struct t_sif_cmd_header hdr; //+00 - int index; //+10 - unsigned int value; //+14 + s32 index; //+10 + u32value; //+14 }; //=18 #define SYSTEM_CMD_RESET 0x80000003 -struct t_sif_reset{ +struct t_sif_reset +{ struct t_sif_cmd_header hdr; //+00 - int size, //+10 - flag; //+14 - char data[80]; //+18 + s32 size; //+10 + s32 flag; //+14 + char data[80]; //+18 }; //=68 /* end of sifcmd.h */ @@ -73,119 +76,119 @@ struct t_sif_reset{ struct t_sif_rpc_rend { - struct t_sif_cmd_header sifcmd; - int rec_id; /* 04 */ - void *pkt_addr; /* 05 */ - int rpc_id; /* 06 */ - - struct t_rpc_client_data *client; /* 7 */ - u32 command; /* 8 */ - struct t_rpc_server_data *server; /* 9 */ - void *buff, /* 10 */ - *buff2; /* 11 */ + struct t_sif_cmd_header sifcmd; + s32 rec_id; /* 04 */ + void *pkt_addr; /* 05 */ + s32 rpc_id; /* 06 */ + + struct t_rpc_client_data *client; /* 7 */ + u32 command; /* 8 */ + struct t_rpc_server_data *server; /* 9 */ + void *buff; /* 10 */ + void *buff2; /* 11 */ }; struct t_sif_rpc_other_data { - struct t_sif_cmd_header sifcmd; - int rec_id; /* 04 */ - void *pkt_addr; /* 05 */ - int rpc_id; /* 06 */ - - struct t_rpc_receive_data *receive; /* 07 */ - void *src; /* 08 */ - void *dest; /* 09 */ - int size; /* 10 */ + struct t_sif_cmd_header sifcmd; + s32 rec_id; /* 04 */ + void *pkt_addr; /* 05 */ + s32 rpc_id; /* 06 */ + + struct t_rpc_receive_data *receive; /* 07 */ + void *src; /* 08 */ + void *dest; /* 09 */ + s32 size; /* 10 */ }; struct t_sif_rpc_bind { - struct t_sif_cmd_header sifcmd; - int rec_id; /* 04 */ - void *pkt_addr; /* 05 */ - int rpc_id; /* 06 */ - struct t_rpc_client_data *client; /* 07 */ - int rpc_number; /* 08 */ + struct t_sif_cmd_header sifcmd; + s32 rec_id; /* 04 */ + void *pkt_addr; /* 05 */ + s32 rpc_id; /* 06 */ + struct t_rpc_client_data *client; /* 07 */ + s32 rpc_number; /* 08 */ }; struct t_sif_rpc_call { - struct t_sif_cmd_header sifcmd; - int rec_id; /* 04 */ - void *pkt_addr; /* 05 */ - int rpc_id; /* 06 */ - struct t_rpc_client_data *client; /* 07 */ - int rpc_number; /* 08 */ - int send_size; /* 09 */ - void *receive; /* 10 */ - int rec_size; /* 11 */ - int has_async_ef; /* 12 */ - struct t_rpc_server_data *server; /* 13 */ + struct t_sif_cmd_header sifcmd; + s32 rec_id; /* 04 */ + void *pkt_addr; /* 05 */ + s32 rpc_id; /* 06 */ + struct t_rpc_client_data *client; /* 07 */ + s32 rpc_number; /* 08 */ + s32 send_size; /* 09 */ + void *receive; /* 10 */ + s32 rec_size; /* 11 */ + s32 has_async_ef; /* 12 */ + struct t_rpc_server_data *server; /* 13 */ }; struct t_rpc_server_data { - int command; /* 04 00 */ + s32 command; /* 04 00 */ - void * (*func)(u32, void *, int); /* 05 01 */ - void *buff; /* 06 02 */ - int size; /* 07 03 */ + void *(*func)(u32, void *, int); /* 05 01 */ + void *buff; /* 06 02 */ + s32 size; /* 07 03 */ - void * (*func2)(u32, void *, int); /* 08 04 */ - void *buff2; /* 09 05 */ - int size2; /* 10 06 */ + void *(*func2)(u32, void *, int); /* 08 04 */ + void *buff2; /* 09 05 */ + s32 size2; /* 10 06 */ - struct t_rpc_client_data *client; /* 11 07 */ - void *pkt_addr; /* 12 08 */ - int rpc_number; /* 13 09 */ + struct t_rpc_client_data *client; /* 11 07 */ + void *pkt_addr; /* 12 08 */ + s32 rpc_number; /* 13 09 */ - void *receive; /* 14 10 */ - int rec_size; /* 15 11 */ - int has_async_ef; /* 16 12 */ - int rec_id; /* 17 13 */ + void *receive; /* 14 10 */ + s32 rec_size; /* 15 11 */ + s32 has_async_ef; /* 16 12 */ + s32 rec_id; /* 17 13 */ - struct t_rpc_server_data *link; /* 18 14 */ - struct r_rpc_server_data *next; /* 19 15 */ - struct t_rpc_data_queue *queued_object; /* 20 16 */ + struct t_rpc_server_data *link; /* 18 14 */ + struct r_rpc_server_data *next; /* 19 15 */ + struct t_rpc_data_queue *queued_object; /* 20 16 */ }; struct t_rpc_header { - void *pkt_addr; /* 04 00 */ - u32 rpc_id; /* 05 01 */ - int sema_id; /* 06 02 */ - u32 mode; /* 07 03 */ + void *pkt_addr; /* 04 00 */ + u32 rpc_id; /* 05 01 */ + s32 sema_id; /* 06 02 */ + u32 mode; /* 07 03 */ }; struct t_rpc_client_data { - struct t_rpc_header hdr; - u32 command; /* 04 08 */ - void *buff, /* 05 09 */ - *buff2; /* 06 10 */ - void (*end_function) ( void *); /* 07 11 */ - void *end_param; /* 08 12*/ - struct t_rpc_server_data *server; /* 09 13 */ + struct t_rpc_header hdr; + u32 command; /* 04 08 */ + void *buff; /* 05 09 */ + void *buff2; /* 06 10 */ + void (*end_function)(void *); /* 07 11 */ + void *end_param; /* 08 12*/ + struct t_rpc_server_data *server; /* 09 13 */ }; struct t_rpc_receive_data { - struct t_rpc_header hdr; - void *src, /* 04 */ - *dest; /* 05 */ - int size; /* 06 */ + struct t_rpc_header hdr; + void *src; /* 04 */ + void *dest; /* 05 */ + s32 size; /* 06 */ }; struct t_rpc_data_queue { - int thread_id, /* 00 */ - active; /* 01 */ - struct t_rpc_server_data *svdata_ref, /* 02 */ - *start, /* 03 */ - *end; /* 04 */ - struct t_rpc_data_queue *next; /* 05 */ + s32 thread_id; /* 00 */ + s32 active; /* 01 */ + struct t_rpc_server_data *svdata_ref; /* 02 */ + struct t_rpc_server_data *start; /* 03 */ + struct t_rpc_server_data *end; /* 04 */ + struct t_rpc_data_queue *next; /* 05 */ }; /* end of sifrpc.h */ diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 806e59f87a..7cc43a2f3f 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -446,13 +446,9 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (vif->cl == vifRegs->cycle.wl) { if (vifRegs->cycle.cl != vifRegs->cycle.wl) - { dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; - } else - { dest += destinc; - } vif->cl = 0; } else @@ -539,7 +535,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } #endif - if (vifRegs->cycle.cl == 0 || vifRegs->cycle.wl == 0 || (vifRegs->cycle.cl == vifRegs->cycle.wl && !(vifRegs->code&0x10000000))) + if ((vifRegs->cycle.cl == 0) || (vifRegs->cycle.wl == 0) || ((vifRegs->cycle.cl == vifRegs->cycle.wl) && !(vifRegs->code & 0x10000000))) { oldcycle = *(u32*) & vifRegs->cycle; vifRegs->cycle.cl = vifRegs->cycle.wl = 1; From a0146d9db1d2b72810cdfa5889e4a560ad055bec Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 1 Apr 2009 12:28:21 +0000 Subject: [PATCH 04/21] How that compiled with a space missing, I may never know... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@880 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Sifcmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Sifcmd.h b/pcsx2/Sifcmd.h index 7a32c9470e..7dd58a95b1 100644 --- a/pcsx2/Sifcmd.h +++ b/pcsx2/Sifcmd.h @@ -58,7 +58,7 @@ struct t_sif_sreg { struct t_sif_cmd_header hdr; //+00 s32 index; //+10 - u32value; //+14 + u32 value; //+14 }; //=18 #define SYSTEM_CMD_RESET 0x80000003 From 4c8cf52c9452c27c12d03814f4ecd97c854d85ec Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 2 Apr 2009 13:42:30 +0000 Subject: [PATCH 05/21] Implemented some more vtlb optimizations: Regalloc should be working a bit better now, and removed some unneeded code on the LWL/SDL/etc interpreter callbacks. Emitter: Added Rm/RmOffset forms for AND32 - Untested. I'm pretty sure they're valid instructions but I could be wrong. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@883 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Counters.h | 2 - pcsx2/R5900OpcodeImpl.cpp | 14 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 4 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 549 ++++++-------------------- pcsx2/x86/ix86/ix86.inl | 16 + pcsx2/x86/ix86/ix86_macros.h | 2 + 6 files changed, 150 insertions(+), 437 deletions(-) diff --git a/pcsx2/Counters.h b/pcsx2/Counters.h index f4c70776ee..6e20dda0c2 100644 --- a/pcsx2/Counters.h +++ b/pcsx2/Counters.h @@ -103,8 +103,6 @@ struct SyncCounter #define SCANLINES_VBLANK1_NTSC 19 // scanlines used for vblank1 (even interlace) #define SCANLINES_VBLANK2_NTSC 20 // scanlines used for vblank2 (odd interlace) -#define HSYNC_ERROR_NTSC ((s32)VSYNC_NTSC - (s32)(((HRENDER_TIME_NTSC+HBLANK_TIME_NTSC) * SCANLINES_TOTAL_NTSC)/2) ) - //------------------------------------------------------------------ // PAL Timing Information!!! (some scanline info is guessed) //------------------------------------------------------------------ diff --git a/pcsx2/R5900OpcodeImpl.cpp b/pcsx2/R5900OpcodeImpl.cpp index f9b2e194ac..800e907dff 100644 --- a/pcsx2/R5900OpcodeImpl.cpp +++ b/pcsx2/R5900OpcodeImpl.cpp @@ -574,11 +574,10 @@ void LDR() void LQ() { + // MIPS Note: LQ and SQ are special and "silently" align memory addresses, thus + // an address error due to unaligned access isn't possible like it is on other loads/stores. + u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + _Imm_; - - if( addr & 15 ) - throw R5900Exception::AddressError( addr, false ); - memRead128(addr & ~0xf, gpr_GetWritePtr(_Rt_)); } @@ -704,11 +703,10 @@ void SDR() void SQ() { - u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + _Imm_; - - if( addr & 15 ) - throw R5900Exception::AddressError( addr, true ); + // MIPS Note: LQ and SQ are special and "silently" align memory addresses, thus + // an address error due to unaligned access isn't possible like it is on other loads/stores. + u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + _Imm_; memWrite128(addr & ~0xf, &cpuRegs.GPR.r[_Rt_].UD[0]); } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index b1f2782703..b38fa22580 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -323,7 +323,9 @@ u32* _eeGetConstReg(int reg) void _eeMoveGPRtoR(x86IntRegType to, int fromgpr) { - if( GPR_IS_CONST1(fromgpr) ) + if( fromgpr == 0 ) + XOR32RtoR( to, to ); // zero register should use xor, thanks --air + else if( GPR_IS_CONST1(fromgpr) ) MOV32ItoR( to, g_cpuConstRegs[fromgpr].UL[0] ); else { int mmreg; diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index e3e3664a5a..800682545a 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -2070,6 +2070,8 @@ void SetFastMemory(int bSetFast) // nothing } +////////////////////////////////////////////////////////////////////////////////////////// +// void recLoad64( u32 bits, bool sign ) { jASSUME( bits == 64 || bits == 128 ); @@ -2096,23 +2098,24 @@ void recLoad64( u32 bits, bool sign ) } else { - _deleteEEreg(_Rs_, 1); // Load ECX with the source memory address that we're reading from. - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); + _eeMoveGPRtoR(ECX, _Rs_); + if ( _Imm_ != 0 ) + ADD32ItoR( ECX, _Imm_ ); + if( bits == 128 ) // force 16 byte alignment on 128 bit reads + AND32I8toR(ECX,0xF0); + _eeOnLoadWrite(_Rt_); EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension _deleteEEreg(_Rt_, 0); - if ( _Imm_ != 0 ) - ADD32ItoR( ECX, _Imm_ ); - - if( bits == 128 ) // force 16 byte alignment on 128 bit reads - AND32I8toR(ECX,0xF0); vtlb_DynGenRead64(bits); } } -void recLoad32(u32 bits,bool sign) +////////////////////////////////////////////////////////////////////////////////////////// +// +void recLoad32( u32 bits, bool sign ) { jASSUME( bits <= 32 ); @@ -2131,14 +2134,13 @@ void recLoad32(u32 bits,bool sign) } else { - _deleteEEreg(_Rs_, 1); // Load ECX with the source memory address that we're reading from. - MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); + _eeMoveGPRtoR(ECX, _Rs_); if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ ); - + + _eeOnLoadWrite(_Rt_); + _deleteEEreg(_Rt_, 0); vtlb_DynGenRead32(bits, sign); } @@ -2155,337 +2157,31 @@ void recLoad32(u32 bits,bool sign) } } -//////////////////////////////////////////////////// -void recLB( void ) +////////////////////////////////////////////////////////////////////////////////////////// +// + +// edxAlreadyAssigned - set to true if edx already holds the value being written (used by SWL/SWR) +void recStore(u32 sz, bool edxAlreadyAssigned=false) { - recLoad32(8,true); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - PUSH32I( (int)&dummyValue[0] ); - PUSH32R( EAX ); - - CALLFunc( (int)memRead8 ); - ADD32ItoR( ESP, 8 ); - if ( _Rt_ ) - { - u8* linkEnd; - TEST32RtoR( EAX, EAX ); - linkEnd = JNZ8( 0 ); - MOV32MtoR( EAX, (int)&dummyValue[0] ); - MOVSX32R8toR( EAX, EAX ); - CDQ( ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); - x86SetJ8( linkEnd ); - } - */ -} - -//////////////////////////////////////////////////// -void recLBU( void ) -{ - recLoad32(8,false); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - PUSH32I( (int)&dummyValue[0] ); - PUSH32R( EAX ); - - CALLFunc( (int)memRead8 ); - ADD32ItoR( ESP, 8 ); - if ( _Rt_ ) - { - u8* linkEnd; - TEST32RtoR( EAX, EAX ); - linkEnd = JNZ8( 0 ); - MOV32MtoR( EAX, (int)&dummyValue[0] ); - MOVZX32R8toR( EAX, EAX ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32ItoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], 0 ); - x86SetJ8( linkEnd ); - } - */ -} - -//////////////////////////////////////////////////// -void recLH( void ) -{ - recLoad32(16,true); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - PUSH32I( (int)&dummyValue[0] ); - PUSH32R( EAX ); - - CALLFunc( (int)memRead16 ); - ADD32ItoR( ESP, 8 ); - if ( _Rt_ ) - { - u8* linkEnd; - TEST32RtoR( EAX, EAX ); - linkEnd = JNZ8( 0 ); - MOV32MtoR( EAX, (int)&dummyValue[0]); - MOVSX32R16toR( EAX, EAX ); - CDQ( ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); - x86SetJ8( linkEnd ); - } - */ -} - -//////////////////////////////////////////////////// -void recLHU( void ) -{ - recLoad32(16,false); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - PUSH32I( (int)&dummyValue[0] ); - PUSH32R( EAX ); - CALLFunc( (int)memRead16 ); - ADD32ItoR( ESP, 8 ); - if ( _Rt_ ) - { - u8* linkEnd; - TEST32RtoR( EAX, EAX ); - linkEnd = JNZ8( 0 ); - MOV32MtoR( EAX, (int)&dummyValue[0] ); - MOVZX32R16toR( EAX, EAX ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32ItoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], 0 ); - x86SetJ8( linkEnd ); - }*/ -} - -//////////////////////////////////////////////////// -void recLW( void ) -{ - recLoad32(32,true); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - - PUSH32I( (int)&dummyValue[0]); - PUSH32R( EAX ); - - CALLFunc( (int)memRead32 ); - ADD32ItoR( ESP, 8 ); - - if ( _Rt_ ) - { - u8* linkEnd; - TEST32RtoR( EAX, EAX ); - linkEnd = JNZ8( 0 ); - MOV32MtoR( EAX, (int)&dummyValue[0]); - CDQ( ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); - x86SetJ8( linkEnd ); - }*/ -} - -//////////////////////////////////////////////////// -void recLWU( void ) -{ - recLoad32(32,false); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - - PUSH32I( (int)&dummyValue[0]); - PUSH32R( EAX ); - CALLFunc( (int)memRead32 ); - ADD32ItoR( ESP, 8 ); - if ( _Rt_ ) - { - u8* linkEnd; - TEST32RtoR( EAX, EAX ); - linkEnd = JNZ8( 0 ); - MOV32MtoR( EAX, (int)&dummyValue[0]); - MOV32RtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32ItoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], 0 ); - x86SetJ8( linkEnd ); - } - */ -} - -//////////////////////////////////////////////////// -void recLWL( void ) -{ - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - _deleteEEreg(_Rt_, 0); - MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); - CALLFunc( (int)LWL ); -} - -//////////////////////////////////////////////////// -void recLWR( void ) -{ - iFlushCall(FLUSH_EVERYTHING); - MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); - CALLFunc( (int)LWR ); -} - -//////////////////////////////////////////////////// -extern void MOV64RmtoR( x86IntRegType to, x86IntRegType from ); - -void recLD( void ) -{ - recLoad64(64,false); - /* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - - if ( _Rt_ ) - { - PUSH32I( (int)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - } - else - { - PUSH32I( (int)&dummyValue[0] ); - } - PUSH32R( EAX ); - CALLFunc( (int)memRead64 ); - ADD32ItoR( ESP, 8 ); - */ -} - -//////////////////////////////////////////////////// -void recLDL( void ) -{ - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension - _deleteEEreg(_Rt_, 0); - MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); - CALLFunc( (int)LDL ); -} - -//////////////////////////////////////////////////// -void recLDR( void ) -{ - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension - _deleteEEreg(_Rt_, 0); - MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); - CALLFunc( (int)LDR ); -} - -//////////////////////////////////////////////////// -void recLQ( void ) -{ - recLoad64(128,false); -/* - _deleteEEreg(_Rs_, 1); - _eeOnLoadWrite(_Rt_); - EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension - _deleteEEreg(_Rt_, 0); - - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_); - } - AND32ItoR( EAX, ~0xf ); - - if ( _Rt_ ) - { - PUSH32I( (int)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - } - else - { - PUSH32I( (int)&dummyValue[0] ); - } - PUSH32R( EAX ); - CALLFunc( (int)memRead128 ); - ADD32ItoR( ESP, 8 ); - */ -} - -void recStore(u32 sz) -{ - //no int 3? i love to get my hands dirty ;p - Raz - //write8(0xCC); - - _deleteEEreg(_Rt_, 1); - // Performance note: Const prop for the store address is good, always. // Constprop for the value being stored is not really worthwhile (better to use register // allocation -- simpler code and just as fast) - // Load EDX first with the value being written, or the address of the value // being written (64/128 bit modes). TODO: use register allocation, if the // value is allocated to a register. - if (sz<64) + if( !edxAlreadyAssigned ) { - if (_Rt_) - MOV32MtoR(EDX,(int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); - else - XOR32RtoR(EDX,EDX); - } - else if (sz==128 || sz==64) - { - MOV32ItoR(EDX,(int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); + if( sz < 64 ) + { + _eeMoveGPRtoR(EDX, _Rt_); + } + else if (sz==128 || sz==64) + { + _deleteEEreg(_Rt_, 1); // flush register to mem + MOV32ItoR(EDX,(int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); + } } // Load ECX with the destination address, or issue a direct optimized write @@ -2499,11 +2195,10 @@ void recStore(u32 sz) } else { - _deleteEEreg(_Rs_, 1); - MOV32MtoR( ECX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); + _eeMoveGPRtoR(ECX, _Rs_); + if ( _Imm_ != 0 ) ADD32ItoR(ECX, _Imm_); - if (sz==128) AND32I8toR(ECX,0xF0); @@ -2511,74 +2206,94 @@ void recStore(u32 sz) } } +////////////////////////////////////////////////////////////////////////////////////////// +// +void recLB( void ) { recLoad32(8,true); } +void recLBU( void ) { recLoad32(8,false); } +void recLH( void ) { recLoad32(16,true); } +void recLHU( void ) { recLoad32(16,false); } +void recLW( void ) { recLoad32(32,true); } +void recLWU( void ) { recLoad32(32,false); } +void recLD( void ) { recLoad64(64,false); } +void recLQ( void ) { recLoad64(128,false); } + +void recSB( void ) { recStore(8); } +void recSH( void ) { recStore(16); } +void recSW( void ) { recStore(32); } +void recSQ( void ) { recStore(128); } +void recSD( void ) { recStore(64); } + +////////////////////////////////////////////////////////////////////////////////////////// +// Non-recompiled Implementations Start Here --> +// (LWL/SWL, LWR/SWR, etc) + //////////////////////////////////////////////////// -void recSB( void ) +void recLWL( void ) { - recStore(8); - /* _deleteEEreg(_Rs_, 1); + _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 1); - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_); - } - PUSH32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - PUSH32R( EAX ); - CALLFunc( (int)memWrite8 ); - ADD32ItoR( ESP, 8 ); - */ + + MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); + CALLFunc( (int)LWL ); } //////////////////////////////////////////////////// -void recSH( void ) +void recLWR( void ) { - recStore(16); - /* _deleteEEreg(_Rs_, 1); + _eeOnLoadWrite(_Rt_); _deleteEEreg(_Rt_, 1); - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - PUSH32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - PUSH32R( EAX ); - CALLFunc( (int)memWrite16 ); - ADD32ItoR( ESP, 8 ); - */ + MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); + CALLFunc( (int)LWR ); } -//////////////////////////////////////////////////// -void recSW( void ) -{ - recStore(32); - /* - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); +static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0x00000000 }; +static const u32 SWR_MASK[4] = { 0x00000000, 0x000000ff, 0x0000ffff, 0x00ffffff }; - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - - PUSH32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - PUSH32R( EAX ); - CALLFunc( (int)memWrite32 ); - ADD32ItoR( ESP, 8 ); - */ -} +static const u8 SWR_SHIFT[4] = { 0, 8, 16, 24 }; +static const u8 SWL_SHIFT[4] = { 24, 16, 8, 0 }; //////////////////////////////////////////////////// void recSWL( void ) { - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); - CALLFunc( (int)SWL ); + // Perform a translated memory read, followed by a translated memory write + // of the "merged" result. + + // NOTE: Code incomplete. I'll fix/finish it soon. --air + if( 0 ) //GPR_IS_CONST1( _Rs_ ) ) + { + _eeOnLoadWrite(_Rt_); + //_deleteEEreg(_Rt_, 0); + + u32 addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; + u32 shift = addr & 3; + vtlb_DynGenRead32_Const( 32, false, addr & 3 ); + + // Prep eax/edx for producing the writeback result: + // equiv to: (cpuRegs.GPR.r[_Rt_].UL[0] >> SWL_SHIFT[shift]) | (mem & SWL_MASK[shift]) + + //_deleteEEreg(_Rt_, 1); + //MOV32MtoR( EDX, (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + + _eeMoveGPRtoR(EDX, _Rt_); + AND32ItoR( EAX, SWL_MASK[shift] ); + SHR32ItoR( EDX, SWL_SHIFT[shift] ); + OR32RtoR( EDX, EAX ); + + recStore( 32, true ); + } + else + { + _deleteEEreg(_Rs_, 1); + _deleteEEreg(_Rt_, 1); + MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); // pc's not needed by SWL + CALLFunc( (int)SWL ); + } } //////////////////////////////////////////////////// @@ -2587,29 +2302,32 @@ void recSWR( void ) _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); CALLFunc( (int)SWR ); } //////////////////////////////////////////////////// -void recSD( void ) +void recLDL( void ) { - recStore(64); - /* _deleteEEreg(_Rs_, 1); + _eeOnLoadWrite(_Rt_); + EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension _deleteEEreg(_Rt_, 1); - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } + MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); + CALLFunc( (int)LDL ); +} - PUSH32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ); - PUSH32M( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - PUSH32R( EAX ); - CALLFunc( (int)memWrite64 ); - ADD32ItoR( ESP, 12 ); - */ +//////////////////////////////////////////////////// +void recLDR( void ) +{ + _deleteEEreg(_Rs_, 1); + _eeOnLoadWrite(_Rt_); + EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension + _deleteEEreg(_Rt_, 1); + MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); + CALLFunc( (int)LDR ); } //////////////////////////////////////////////////// @@ -2618,7 +2336,7 @@ void recSDL( void ) _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); CALLFunc( (int)SDL ); } @@ -2628,30 +2346,11 @@ void recSDR( void ) _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); MOV32ItoM( (int)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (int)&cpuRegs.pc, pc ); + //MOV32ItoM( (int)&cpuRegs.pc, pc ); CALLFunc( (int)SDR ); } -//////////////////////////////////////////////////// -void recSQ( void ) -{ - recStore(128); - /* - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - if ( _Imm_ != 0 ) - { - ADD32ItoR( EAX, _Imm_ ); - } - AND32ItoR( EAX, ~0xf ); - - PUSH32I( (int)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - PUSH32R( EAX ); - CALLFunc( (int)memWrite128 ); - ADD32ItoR( ESP, 8 );*/ -} - +////////////////////////////////////////////////////////////////////////////////////////// /********************************************************* * Load and store for COP1 * * Format: OP rt, offset(base) * @@ -2667,8 +2366,6 @@ void recLWC1( void ) if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ ); - //MOV32ItoR(EDX, (int)&fpuRegs.fpr[ _Rt_ ].UL ); //no 0 for fpu ? - //CALLFunc( (int)memRead32 ); vtlb_DynGenRead32(32, false); MOV32RtoM( (int)&fpuRegs.fpr[ _Rt_ ].UL, EAX ); } diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index 606eaed417..ae4f5829af 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -2344,6 +2344,22 @@ emitterT void eAND32MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } +// Warning: Untested form of AND. +emitterT void eAND32RmtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write8( 0x23 ); + ModRM( 0, to, from ); +} + +// Warning: Untested form of AND. +emitterT void eAND32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(0,to,from); + write16( 0x23 ); + WriteRmOffsetFrom(to,from,offset); +} + // and r16 to r16 emitterT void eAND16RtoR( x86IntRegType to, x86IntRegType from ) { diff --git a/pcsx2/x86/ix86/ix86_macros.h b/pcsx2/x86/ix86/ix86_macros.h index ea37192104..ab4ff0021e 100644 --- a/pcsx2/x86/ix86/ix86_macros.h +++ b/pcsx2/x86/ix86/ix86_macros.h @@ -274,6 +274,8 @@ #define AND32RtoR eAND32RtoR<_EmitterId_> #define AND32RtoM eAND32RtoM<_EmitterId_> #define AND32MtoR eAND32MtoR<_EmitterId_> +#define AND32RmtoR eAND32RmtoR<_EmitterId_> +#define AND32RmtoROffset eAND32RmtoROffset<_EmitterId_> #define AND16RtoR eAND16RtoR<_EmitterId_> #define AND16ItoR eAND16ItoR<_EmitterId_> #define AND16ItoM eAND16ItoM<_EmitterId_> From d60718e79d5d821fa1e52157cb4cc577fe9cf88d Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 2 Apr 2009 14:50:19 +0000 Subject: [PATCH 06/21] More vtlb optimizations: Switched over to full const resolution of the TLB, and added a shortcut for the INTC_STAT register (replacing the one rama added to HwRead.cpp a couple days ago). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@884 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/HwRead.cpp | 16 ++-- pcsx2/x86/ix86-32/recVTLB.cpp | 159 +++++++++++++++++++++------------- 2 files changed, 105 insertions(+), 70 deletions(-) diff --git a/pcsx2/HwRead.cpp b/pcsx2/HwRead.cpp index 557a0dea46..cf86b7cbbb 100644 --- a/pcsx2/HwRead.cpp +++ b/pcsx2/HwRead.cpp @@ -214,6 +214,12 @@ static __forceinline mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) switch( mem ) { + case 0xf000: + if( intchack ) IntCHackCheck(); + // This one is checked alot, so leave it commented out unless you love 600 meg logfiles. + //HW_LOG("INTC_STAT Read 32bit %x", psHu32(0xf010)); + break; + case 0xf010: HW_LOG("INTC_MASK Read32, value=0x%x", psHu32(INTC_MASK)); break; @@ -255,22 +261,12 @@ static __forceinline mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) } } return 0; - - case 0xf000: - //Put this back on top in case you remove the shortcut for intc_stat register (see below function) (rama). - if( intchack ) IntCHackCheck(); - // This one is checked alot, so leave it commented out unless you love 600 meg logfiles. - //HW_LOG("INTC_STAT Read 32bit %x", psHu32(0xf010)); - break; } return *((u32*)&PS2MEM_HW[mem]); } mem32_t __fastcall hwRead32_page_0F(u32 mem) { - if (mem == 0x1000f000) //shortcut for intc_stat - return *((u32*)&PS2MEM_HW[0xF000]); - return __hwRead32_page_0F( mem, false ); } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 9802718068..5e6e993c86 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -239,64 +239,93 @@ void vtlb_DynGenRead32(u32 bits, bool sign) x86SetJ8(cont); } +// +// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the +// recompiler if the TLB is changed. void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { - jASSUME( bits == 64 || bits == 128 ); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; + s32 ppf = addr_const + vmv_ptr; + if( ppf >= 0 ) + { + MOV32ItoR( ECX, ppf ); + _vtlb_DynGen_DirectRead( bits, false ); + } + else + { + // has to: translate, find function, call function + u32 handler = (u8)vmv_ptr; + u32 paddr = ppf - handler + 0x80000000; - void* vmv_ptr = &vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; + int szidx = 0; + switch( bits ) + { + case 64: szidx=3; break; + case 128: szidx=4; break; + } - MOV32MtoR(EAX,(uptr)vmv_ptr); - MOV32ItoR(ECX,addr_const); - ADD32RtoR(ECX,EAX); // ecx=ppf - u8* _fullread = JS8(0); - - _vtlb_DynGen_DirectRead( bits, false ); - u8* cont = JMP8(0); - - x86SetJ8(_fullread); - _vtlb_DynGen_IndirectRead( bits ); - - x86SetJ8(cont); + MOV32ItoR( ECX, paddr ); + CALLFunc( (int)vtlbdata.RWFT[szidx][0][handler] ); + } } // Recompiled input registers: // ecx - source address to read from // Returns read value in eax. +// +// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the +// recompiler if the TLB is changed. void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) { - jASSUME( bits <= 32 ); - - void* vmv_ptr = &vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; - - MOV32MtoR(EAX,(uptr)vmv_ptr); - MOV32ItoR(ECX,addr_const); - ADD32RtoR(ECX,EAX); // ecx=ppf - u8* _fullread = JS8(0); - - _vtlb_DynGen_DirectRead( bits, sign ); - u8* cont = JMP8(0); - - x86SetJ8(_fullread); - _vtlb_DynGen_IndirectRead( bits ); - - // perform sign extension on the result: - - if( bits==8 ) + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; + s32 ppf = addr_const + vmv_ptr; + if( ppf >= 0 ) { - if( sign ) - MOVSX32R8toR(EAX,EAX); - else - MOVZX32R8toR(EAX,EAX); + MOV32ItoR( ECX, ppf ); + _vtlb_DynGen_DirectRead( bits, sign ); } - else if( bits==16 ) + else { - if( sign ) - MOVSX32R16toR(EAX,EAX); - else - MOVZX32R16toR(EAX,EAX); - } + // has to: translate, find function, call function + u32 handler = (u8)vmv_ptr; + u32 paddr = ppf - handler + 0x80000000; + + int szidx = 0; + switch( bits ) + { + case 8: szidx=0; break; + case 16: szidx=1; break; + case 32: szidx=2; break; + } - x86SetJ8(cont); + // Shortcut for the INTC_STAT register, which many games like to spin on heavily. + if( (bits == 32) && !CHECK_INTC_STAT_HACK && (paddr == INTC_STAT) ) + { + MOV32MtoR( EAX, (uptr)&psHu32( INTC_STAT ) ); + } + else + { + MOV32ItoR( ECX, paddr ); + CALLFunc( (int)vtlbdata.RWFT[szidx][0][handler] ); + + // perform sign extension on the result: + + if( bits==8 ) + { + if( sign ) + MOVSX32R8toR(EAX,EAX); + else + MOVZX32R8toR(EAX,EAX); + } + else if( bits==16 ) + { + if( sign ) + MOVSX32R16toR(EAX,EAX); + else + MOVZX32R16toR(EAX,EAX); + } + } + } } ////////////////////////////////////////////////////////////////////////////////////////// @@ -392,24 +421,34 @@ void vtlb_DynGenWrite(u32 sz) // Generates code for a store instruction, where the address is a known constant. +// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the +// recompiler if the TLB is changed. void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) { - // Important: It's not technically safe to do a const lookup of the VTLB here, since - // the VTLB could feasibly be remapped by other recompiled code at any time. - // So we're limited in exactly how much we can pre-calcuate. + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; + s32 ppf = addr_const + vmv_ptr; + if( ppf >= 0 ) + { + MOV32ItoR( ECX, ppf ); + _vtlb_DynGen_DirectWrite( bits ); + } + else + { + // has to: translate, find function, call function + u32 handler = (u8)vmv_ptr; + u32 paddr = ppf - handler + 0x80000000; + + int szidx = 0; + switch( bits ) + { + case 8: szidx=0; break; + case 16: szidx=1; break; + case 32: szidx=2; break; + case 64: szidx=3; break; + case 128: szidx=4; break; + } - void* vmv_ptr = &vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; - - MOV32MtoR(EAX,(uptr)vmv_ptr); - MOV32ItoR(ECX,addr_const); - ADD32RtoR(ECX,EAX); // ecx=ppf - u8* _full = JS8(0); - - _vtlb_DynGen_DirectWrite( bits ); - u8* cont = JMP8(0); - - x86SetJ8(_full); - _vtlb_DynGen_IndirectWrite( bits ); - - x86SetJ8(cont); + MOV32ItoR( ECX, paddr ); + CALLFunc( (int)vtlbdata.RWFT[szidx][1][handler] ); + } } From a8d9cbc25d8664d34b98581b3b5ae9cea5cea2c0 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 2 Apr 2009 15:17:47 +0000 Subject: [PATCH 07/21] Final pass of today's vtlb optimizations: Improved the codegen for const-propagated direct reads and writes (very minor optimization). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@885 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86-32/recVTLB.cpp | 116 ++++++++++++++++++++++++++++++++-- pcsx2/x86/ix86/ix86_macros.h | 2 +- 2 files changed, 111 insertions(+), 7 deletions(-) diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 5e6e993c86..cbc567c68d 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -248,8 +248,46 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) { - MOV32ItoR( ECX, ppf ); - _vtlb_DynGen_DirectRead( bits, false ); + switch( bits ) + { + case 64: + if( _hasFreeMMXreg() ) + { + const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); + MOVQMtoR(freereg,ppf); + MOVQRtoRmOffset(EDX,freereg,0); + _freeMMXreg(freereg); + } + else + { + MOV32MtoR(EAX,ppf); + MOV32RtoRm(EDX,EAX); + + MOV32MtoR(EAX,ppf+4); + MOV32RtoRmOffset(EDX,EAX,4); + } + break; + + case 128: + if( _hasFreeXMMreg() ) + { + const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); + SSE2_MOVDQA_M128_to_XMM( freereg, ppf ); + SSE2_MOVDQARtoRmOffset(EDX,freereg,0); + _freeXMMreg(freereg); + } + else + { + // Could put in an MMX optimization here as well, but no point really. + // It's almost never used since there's almost always a free XMM reg. + + MOV32ItoR( ECX, ppf ); + MOV128_MtoM( EDX, ECX ); // dest <- src! + } + break; + + jNO_DEFAULT + } } else { @@ -281,8 +319,26 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) { - MOV32ItoR( ECX, ppf ); - _vtlb_DynGen_DirectRead( bits, sign ); + switch( bits ) + { + case 8: + if( sign ) + MOVSX32M8toR(EAX,ppf); + else + MOVZX32M8toR(EAX,ppf); + break; + + case 16: + if( sign ) + MOVSX32M16toR(EAX,ppf); + else + MOVZX32M16toR(EAX,ppf); + break; + + case 32: + MOV32MtoR(EAX,ppf); + break; + } } else { @@ -429,8 +485,56 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) { - MOV32ItoR( ECX, ppf ); - _vtlb_DynGen_DirectWrite( bits ); + switch(bits) + { + //8 , 16, 32 : data on EDX + case 8: + MOV8RtoM(ppf,EDX); + break; + case 16: + MOV16RtoM(ppf,EDX); + break; + case 32: + MOV32RtoM(ppf,EDX); + break; + + case 64: + if( _hasFreeMMXreg() ) + { + const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); + MOVQRmtoROffset(freereg,EDX,0); + MOVQRtoM(ppf,freereg); + _freeMMXreg( freereg ); + } + else + { + MOV32RmtoR(EAX,EDX); + MOV32RtoM(ppf,EAX); + + MOV32RmtoROffset(EAX,EDX,4); + MOV32RtoM(ppf+4,EAX); + } + break; + + case 128: + if( _hasFreeXMMreg() ) + { + const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); + SSE2_MOVDQARmtoROffset(freereg,EDX,0); + SSE2_MOVDQA_XMM_to_M128(ppf,freereg); + _freeXMMreg( freereg ); + } + else + { + // Could put in an MMX optimization here as well, but no point really. + // It's almost never used since there's almost always a free XMM reg. + + MOV32ItoR( ECX, ppf ); + MOV128_MtoM( ECX, EDX ); // dest <- src! + } + break; + } + } else { diff --git a/pcsx2/x86/ix86/ix86_macros.h b/pcsx2/x86/ix86/ix86_macros.h index ab4ff0021e..2ad9cdfe47 100644 --- a/pcsx2/x86/ix86/ix86_macros.h +++ b/pcsx2/x86/ix86/ix86_macros.h @@ -708,7 +708,7 @@ #define SSE2_MOVDQA_XMM_to_XMM eSSE2_MOVDQA_XMM_to_XMM<_EmitterId_> #define SSE2_MOVDQU_M128_to_XMM eSSE2_MOVDQU_M128_to_XMM<_EmitterId_> #define SSE2_MOVDQU_XMM_to_M128 eSSE2_MOVDQU_XMM_to_M128<_EmitterId_> -#define SSE2_MOVDQU_XMM_to_XMM eSSE2_MOVDQU_XMM_to_XMM<_EmitterId_> +#define SSE2_MOVDQU_XMM_to_XMM eSSE2_MOVDQA_XMM_to_XMM<_EmitterId_> #define SSE2_PSRLW_XMM_to_XMM eSSE2_PSRLW_XMM_to_XMM<_EmitterId_> #define SSE2_PSRLW_M128_to_XMM eSSE2_PSRLW_M128_to_XMM<_EmitterId_> #define SSE2_PSRLW_I8_to_XMM eSSE2_PSRLW_I8_to_XMM<_EmitterId_> From 2caebe0069f323549b981da548a3fd03b827b0e9 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Thu, 2 Apr 2009 21:04:45 +0000 Subject: [PATCH 08/21] T/D flag interrupting was missing on the VUs. Nneeve implemented it :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@886 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VU0microInterp.cpp | 2 ++ pcsx2/VU1microInterp.cpp | 2 ++ pcsx2/x86/iVUmicro.cpp | 6 +++++- pcsx2/x86/iVUzerorec.cpp | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index cf70894bd1..eaf7b9b05f 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -76,12 +76,14 @@ static void _vu0Exec(VURegs* VU) VU0.VI[REG_VPU_STAT].UL|= 0x2; hwIntcIrq(INTC_VU0); } + VU->ebit = 1; } if (ptr[1] & 0x08000000) { /* T flag */ if (VU0.VI[REG_FBRST].UL & 0x8) { VU0.VI[REG_VPU_STAT].UL|= 0x4; hwIntcIrq(INTC_VU0); } + VU->ebit = 1; } VU->code = ptr[1]; diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index fc8f7f793a..fe2028475e 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -71,12 +71,14 @@ static void _vu1Exec(VURegs* VU) VU0.VI[REG_VPU_STAT].UL|= 0x200; hwIntcIrq(INTC_VU1); } + VU->ebit = 1; } if (ptr[1] & 0x08000000) { /* T flag */ if (VU0.VI[REG_FBRST].UL & 0x800) { VU0.VI[REG_VPU_STAT].UL|= 0x400; hwIntcIrq(INTC_VU1); } + VU->ebit = 1; } VUM_LOG("VU->cycle = %d (flags st=%x;mac=%x;clip=%x,q=%f)", VU->cycle, VU->statusflag, VU->macflag, VU->clipflag, VU->q.F); diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index 3e24cd6930..e4ff05e8f9 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -474,7 +474,11 @@ void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs) if (ptr[1] & 0x40000000) { // EOP branch |= 8; } - + + if (ptr[1] & 0x18000000) { // T/D flags + branch |= 16 | 8; //stop right after this instruction + } + VU->code = ptr[1]; if (VU == &VU1) VU1regs_UPPER_OPCODE[VU->code & 0x3f](uregs); else VU0regs_UPPER_OPCODE[VU->code & 0x3f](uregs); diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index db9ff78fbb..4337b2c607 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -1246,7 +1246,7 @@ static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const V if( pinst->info.p&VUOP_WRITE ) pinst->pqcycles = PWaitTimes[pinst->info.pqinst]+1; - if( prevbranch ) { + if( prevbranch || (branch & 16)) { break; } From 35655edb47323d922bef90ebf269fcfa4dff4c5a Mon Sep 17 00:00:00 2001 From: gabest11 Date: Thu, 2 Apr 2009 21:22:37 +0000 Subject: [PATCH 09/21] GSdx: the BoF5 speed fix git-svn-id: http://pcsx2.googlecode.com/svn/trunk@887 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSState.cpp | 50 ++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 6e546a9bcc..ff17150ec4 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -1139,9 +1139,6 @@ void GSState::Move() // ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect // guitar hero copies the far end of the board to do a similar blend too - GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp; - GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp; - int sx = m_env.TRXPOS.SSAX; int dx = m_env.TRXPOS.DSAX; int sy = m_env.TRXPOS.SSAY; @@ -1151,17 +1148,56 @@ void GSState::Move() int xinc = 1; int yinc = 1; - if(sx < dx) sx += w-1, dx += w-1, xinc = -1; - if(sy < dy) sy += h-1, dy += h-1, yinc = -1; - InvalidateLocalMem(m_env.BITBLTBUF, CRect(CPoint(sx, sy), CSize(w, h))); InvalidateVideoMem(m_env.BITBLTBUF, CRect(CPoint(dx, dy), CSize(w, h))); - // TODO: use rowOffset + if(sx < dx) sx += w-1, dx += w-1, xinc = -1; + if(sy < dy) sy += h-1, dy += h-1, yinc = -1; + +/* + GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp; + GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp; for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) for(int x = 0; x < w; x++, sx += xinc, dx += xinc) (m_mem.*wp)(dx, dy, (m_mem.*rp)(sx, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW), m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); +*/ + + const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM]; + const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM]; + + if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32) + { + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) + { + DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + int* soffset = spsm.rowOffset[sy & 7]; + + DWORD dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + int* doffset = dpsm.rowOffset[dy & 7]; + + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + { + m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx])); + } + } + } + else + { + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) + { + DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + int* soffset = spsm.rowOffset[sy & 7]; + + DWORD dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + int* doffset = dpsm.rowOffset[dy & 7]; + + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + { + (m_mem.*dpsm.wpa)(dbase + doffset[dx], (m_mem.*spsm.rpa)(sbase + soffset[sx])); + } + } + } } void GSState::SoftReset(BYTE mask) From e39dec6ddb6a8b72c670e12e4224bb363d6838fb Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Thu, 2 Apr 2009 22:01:16 +0000 Subject: [PATCH 10/21] Oh well, a problem with the T/D flag code made the bios screw up. Since it can't be fixed without further code in superVU, full revert for now :/ git-svn-id: http://pcsx2.googlecode.com/svn/trunk@888 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VU0microInterp.cpp | 2 -- pcsx2/VU1microInterp.cpp | 2 -- pcsx2/x86/iVUmicro.cpp | 4 ---- pcsx2/x86/iVUzerorec.cpp | 2 +- 4 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pcsx2/VU0microInterp.cpp b/pcsx2/VU0microInterp.cpp index eaf7b9b05f..cf70894bd1 100644 --- a/pcsx2/VU0microInterp.cpp +++ b/pcsx2/VU0microInterp.cpp @@ -76,14 +76,12 @@ static void _vu0Exec(VURegs* VU) VU0.VI[REG_VPU_STAT].UL|= 0x2; hwIntcIrq(INTC_VU0); } - VU->ebit = 1; } if (ptr[1] & 0x08000000) { /* T flag */ if (VU0.VI[REG_FBRST].UL & 0x8) { VU0.VI[REG_VPU_STAT].UL|= 0x4; hwIntcIrq(INTC_VU0); } - VU->ebit = 1; } VU->code = ptr[1]; diff --git a/pcsx2/VU1microInterp.cpp b/pcsx2/VU1microInterp.cpp index fe2028475e..fc8f7f793a 100644 --- a/pcsx2/VU1microInterp.cpp +++ b/pcsx2/VU1microInterp.cpp @@ -71,14 +71,12 @@ static void _vu1Exec(VURegs* VU) VU0.VI[REG_VPU_STAT].UL|= 0x200; hwIntcIrq(INTC_VU1); } - VU->ebit = 1; } if (ptr[1] & 0x08000000) { /* T flag */ if (VU0.VI[REG_FBRST].UL & 0x800) { VU0.VI[REG_VPU_STAT].UL|= 0x400; hwIntcIrq(INTC_VU1); } - VU->ebit = 1; } VUM_LOG("VU->cycle = %d (flags st=%x;mac=%x;clip=%x,q=%f)", VU->cycle, VU->statusflag, VU->macflag, VU->clipflag, VU->q.F); diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index e4ff05e8f9..2422e5ad10 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -473,10 +473,6 @@ void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs) if (ptr[1] & 0x40000000) { // EOP branch |= 8; - } - - if (ptr[1] & 0x18000000) { // T/D flags - branch |= 16 | 8; //stop right after this instruction } VU->code = ptr[1]; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 4337b2c607..db9ff78fbb 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -1246,7 +1246,7 @@ static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const V if( pinst->info.p&VUOP_WRITE ) pinst->pqcycles = PWaitTimes[pinst->info.pqinst]+1; - if( prevbranch || (branch & 16)) { + if( prevbranch ) { break; } From 3b570f8a2bb37379204744d347f70da10940bc4f Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 2 Apr 2009 22:22:02 +0000 Subject: [PATCH 11/21] Fixed a bug from an earlier vtlb commit that caused some slowdown when INTC_HACK was disabled. Turns out games spinning on INTC_STAT don't do it in a way that allows the recompiler to propagate consts. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@889 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/HwRead.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pcsx2/HwRead.cpp b/pcsx2/HwRead.cpp index cf86b7cbbb..bedf07fc46 100644 --- a/pcsx2/HwRead.cpp +++ b/pcsx2/HwRead.cpp @@ -212,14 +212,20 @@ static __forceinline mem32_t __hwRead32_page_0F( u32 mem, bool intchack ) mem &= 0xffff; + // INTC_STAT shortcut for heavy spinning. + // Performance Note: Visual Studio handles this best if we just manually check for it here, + // outside the context of the switch statement below. This is likely fixed by PGO also, + // but it's an easy enough conditional to account for anyways. + + static const uint ics = INTC_STAT & 0xffff; + if( mem == ics ) // INTC_STAT + { + if( intchack ) IntCHackCheck(); + return *((u32*)&PS2MEM_HW[ics]); + } + switch( mem ) { - case 0xf000: - if( intchack ) IntCHackCheck(); - // This one is checked alot, so leave it commented out unless you love 600 meg logfiles. - //HW_LOG("INTC_STAT Read 32bit %x", psHu32(0xf010)); - break; - case 0xf010: HW_LOG("INTC_MASK Read32, value=0x%x", psHu32(INTC_MASK)); break; From b44d1590cad4ad4d561cdf119ebeb0820a8e8862 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 3 Apr 2009 01:05:28 +0000 Subject: [PATCH 12/21] Fiddle with ProcessFKeys a bit, and hack in a key to turn logging on and off(F10). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@891 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/DebugTools/Debug.h | 1 + pcsx2/Linux/LnxSysExec.cpp | 190 +++++++++++++++++------------------ pcsx2/Misc.cpp | 57 +++++++---- pcsx2/Misc.h | 11 +- pcsx2/SourceLog.cpp | 9 +- pcsx2/windows/WinSysExec.cpp | 18 +++- 6 files changed, 165 insertions(+), 121 deletions(-) diff --git a/pcsx2/DebugTools/Debug.h b/pcsx2/DebugTools/Debug.h index 77152441df..95aed6fe99 100644 --- a/pcsx2/DebugTools/Debug.h +++ b/pcsx2/DebugTools/Debug.h @@ -74,6 +74,7 @@ namespace R3000A #ifdef PCSX2_DEVBUILD extern u32 varLog; +extern bool enableLogging; void SourceLog( u16 protocol, u8 source, u32 cpuPc, u32 cpuCycle, const char *fmt, ...); void __Log( const char* fmt, ... ); diff --git a/pcsx2/Linux/LnxSysExec.cpp b/pcsx2/Linux/LnxSysExec.cpp index 0080dff9bc..7a437df3c1 100644 --- a/pcsx2/Linux/LnxSysExec.cpp +++ b/pcsx2/Linux/LnxSysExec.cpp @@ -291,12 +291,6 @@ void OnStates_SaveOther(GtkMenuItem *menuitem, gpointer user_data) gdk_window_raise(FileSel->window); } -/* Quick macros for checking shift, control, alt, and caps lock. */ -#define SHIFT_EVT(evt) ((evt == XK_Shift_L) || (evt == XK_Shift_R)) -#define CTRL_EVT(evt) ((evt == XK_Control_L) || (evt == XK_Control_L)) -#define ALT_EVT(evt) ((evt == XK_Alt_L) || (evt == XK_Alt_R)) -#define CAPS_LOCK_EVT(evt) (evt == XK_Caps_Lock) - bool SysInit() { if (sinit) return true; @@ -450,98 +444,100 @@ namespace HostGui SysExecute(); } - void __fastcall KeyEvent(keyEvent* ev) +/* Quick macros for checking shift, control, alt, and caps lock. */ +#define SHIFT_EVT(evt) ((evt == XK_Shift_L) || (evt == XK_Shift_R)) +#define CTRL_EVT(evt) ((evt == XK_Control_L) || (evt == XK_Control_R)) +#define ALT_EVT(evt) ((evt == XK_Alt_L) || (evt == XK_Alt_R)) +#define CAPS_LOCK_EVT(evt) (evt == XK_Caps_Lock) + +void __fastcall KeyEvent(keyEvent* ev) +{ + struct KeyModifiers *keymod = &keymodifiers; + + if (ev == NULL) return; + + if (ev->evt == KEYRELEASE) { - static int shift = 0; - - if (ev == NULL) return; - - if (GSkeyEvent != NULL) GSkeyEvent(ev); - - if (ev->evt == KEYPRESS) - { - if (SHIFT_EVT(ev->key)) - shift = 1; - if (CAPS_LOCK_EVT(ev->key)) - { - //Set up anything we want to happen while caps lock is down. - } - - switch (ev->key) - { - case XK_F1: - case XK_F2: - case XK_F3: - case XK_F4: - case XK_F5: - case XK_F6: - case XK_F7: - case XK_F8: - case XK_F9: - case XK_F10: - case XK_F11: - case XK_F12: - try - { - ProcessFKeys(ev->key - XK_F1 + 1, shift); - } - catch (Exception::CpuStateShutdown&) - { - // Woops! Something was unrecoverable. Bummer. - // Let's give the user a RunGui! - - g_EmulationInProgress = false; - SysEndExecution(); - } - break; - - case XK_Tab: - CycleFrameLimit(0); - break; - - case XK_Escape: - signal(SIGINT, SIG_DFL); - signal(SIGPIPE, SIG_DFL); - - #ifdef PCSX2_DEVBUILD - if (g_SaveGSStream >= 3) - { - g_SaveGSStream = 4;// gs state - break; - } - #endif - SysEndExecution(); - - if (g_Startup.NoGui) exit(0); - - // fixme: The GUI is now capable of receiving control back from the - // emulator. Which means that when we call SysEscapeExecute() here, the - // emulation loop in ExecuteCpu() will exit. You should be able to set it - // up so that it returns control to the existing GTK event loop, instead of - // always starting a new one via RunGui(). (but could take some trial and - // error) -- (air) - - // Easier said then done; running gtk in two threads at the same time can't be - // done, and working around that is pretty fiddly. - RunGui(); - break; - - default: - GSkeyEvent(ev); - break; - } - } - else if (ev->evt == KEYRELEASE) - { - if (SHIFT_EVT(ev->key)) - shift = 0; - if (CAPS_LOCK_EVT(ev->key)) - { - //Release caps lock - } - } - + if (SHIFT_EVT(ev->key)) keymod->shift = FALSE; + if (CTRL_EVT(ev->key)) keymod->control = FALSE; + if (ALT_EVT(ev->key)) keymod->alt = FALSE; + if (CAPS_LOCK_EVT(ev->key)) keymod->capslock = FALSE; + GSkeyEvent(ev); return; } -} \ No newline at end of file + if (ev->evt == KEYPRESS) + { + if (SHIFT_EVT(ev->key)) keymod->shift = TRUE; + if (CTRL_EVT(ev->key)) keymod->control = TRUE; + if (ALT_EVT(ev->key)) keymod->alt = TRUE; + if (CAPS_LOCK_EVT(ev->key)) keymod->capslock = TRUE; + + switch (ev->key) + { + case XK_F1: + case XK_F2: + case XK_F3: + case XK_F4: + case XK_F5: + case XK_F6: + case XK_F7: + case XK_F8: + case XK_F9: + case XK_F10: + case XK_F11: + case XK_F12: + try + { + ProcessFKeys(ev->key - XK_F1 + 1, keymod); + } + catch (Exception::CpuStateShutdown&) + { + // Woops! Something was unrecoverable. Bummer. + // Let's give the user a RunGui! + + g_EmulationInProgress = false; + SysEndExecution(); + } + break; + + case XK_Tab: + CycleFrameLimit(0); + break; + + case XK_Escape: + signal(SIGINT, SIG_DFL); + signal(SIGPIPE, SIG_DFL); + +#ifdef PCSX2_DEVBUILD + if (g_SaveGSStream >= 3) + { + g_SaveGSStream = 4;// gs state + break; + } +#endif + SysEndExecution(); + + if (g_Startup.NoGui) exit(0); + + // fixme: The GUI is now capable of receiving control back from the + // emulator. Which means that when we call SysEscapeExecute() here, the + // emulation loop in ExecuteCpu() will exit. You should be able to set it + // up so that it returns control to the existing GTK event loop, instead of + // always starting a new one via RunGui(). (but could take some trial and + // error) -- (air) + + // Easier said then done; running gtk in two threads at the same time can't be + // done, and working around that is pretty fiddly. + RunGui(); + break; + + default: + GSkeyEvent(ev); + break; + } + } + + return; +} +} diff --git a/pcsx2/Misc.cpp b/pcsx2/Misc.cpp index 738550ad32..626830295f 100644 --- a/pcsx2/Misc.cpp +++ b/pcsx2/Misc.cpp @@ -51,6 +51,8 @@ char CdromId[12]; static int g_Pcsx2Recording = 0; // true 1 if recording video and sound bool renderswitch = 0; +struct KeyModifiers keymodifiers = {false, false, false, false}; + #define NUM_STATES 10 int StatesC = 0; @@ -489,11 +491,12 @@ void CycleFrameLimit(int dir) //SaveConfig(); } -void ProcessFKeys(int fkey, int shift) +void ProcessFKeys(int fkey, struct KeyModifiers *keymod) { assert(fkey >= 1 && fkey <= 12 ); - switch(fkey) { + switch(fkey) + { case 1: try { @@ -511,7 +514,7 @@ void ProcessFKeys(int fkey, int shift) break; case 2: - if( shift ) + if( keymod->shift ) StatesC = (StatesC+NUM_STATES-1) % NUM_STATES; else StatesC = (StatesC+1) % NUM_STATES; @@ -558,7 +561,7 @@ void ProcessFKeys(int fkey, int shift) break; case 4: - CycleFrameLimit(shift ? -1 : 1); + CycleFrameLimit(keymod->shift ? -1 : 1); break; // note: VK_F5-VK_F7 are reserved for GS @@ -567,7 +570,8 @@ void ProcessFKeys(int fkey, int shift) break; case 9: //gsdx "on the fly" renderer switching - if (!renderswitch) { + if (!renderswitch) + { StateRecovery::MakeGsOnly(); g_EmulationInProgress = false; CloseGS(); @@ -575,7 +579,8 @@ void ProcessFKeys(int fkey, int shift) StateRecovery::Recover(); HostGui::BeginExecution(); //also sets g_EmulationInProgress to true later } - else { + else + { StateRecovery::MakeGsOnly(); g_EmulationInProgress = false; CloseGS(); @@ -585,21 +590,35 @@ void ProcessFKeys(int fkey, int shift) } break; #ifdef PCSX2_DEVBUILD - + case 10: + // There's likely a better way to implement this, but this seemed useful. + // I might add turning EE, VU0, and VU1 recs on and off by hotkey at some point, too. + // --arcum42 + enableLogging = !enableLogging; + + if (enableLogging) + GSprintf(10, "Logging Enabled."); + else + GSprintf(10,"Logging Disabled."); + + break; case 11: - if( mtgsThread != NULL ) { + if( mtgsThread != NULL ) + { Console::Notice( "Cannot make gsstates in MTGS mode" ); } else { string Text; - if( strgametitle[0] != 0 ) { + if( strgametitle[0] != 0 ) + { // only take the first two words char name[256], *tok; string gsText; tok = strtok(strgametitle, " "); sprintf(name, "%s_", mystrlwr(tok)); + tok = strtok(NULL, " "); if( tok != NULL ) strcat(name, tok); @@ -607,28 +626,32 @@ void ProcessFKeys(int fkey, int shift) Text = Path::Combine( SSTATES_DIR, gsText ); } else + { Text = GetGSStateFilename(); - + } + SaveGSState(Text); } break; #endif case 12: - if( shift ) { + if( keymod->shift ) + { #ifdef PCSX2_DEVBUILD iDumpRegisters(cpuRegs.pc, 0); Console::Notice("hardware registers dumped EE:%x, IOP:%x\n", params cpuRegs.pc, psxRegs.pc); #endif } - else { + else + { g_Pcsx2Recording ^= 1; - if( mtgsThread != NULL ) { + + if( mtgsThread != NULL ) mtgsThread->SendSimplePacket(GS_RINGTYPE_RECORD, g_Pcsx2Recording, 0, 0); - } - else { - if( GSsetupRecording != NULL ) GSsetupRecording(g_Pcsx2Recording, NULL); - } + else if( GSsetupRecording != NULL ) + GSsetupRecording(g_Pcsx2Recording, NULL); + if( SPU2setupRecording != NULL ) SPU2setupRecording(g_Pcsx2Recording, NULL); } break; diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index d90a1cf020..7eb69a3498 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -19,6 +19,15 @@ #ifndef __MISC_H__ #define __MISC_H__ +struct KeyModifiers +{ + bool control; + bool alt; + bool shift; + bool capslock; +}; +extern struct KeyModifiers keymodifiers; + // Per ChickenLiver, this is being used to pass the GS plugins window handle to the Pad plugins. // So a rename to pDisplay is in the works, but it will not, in fact, be removed. extern uptr pDsp; //Used in GS, MTGS, Plugins, Misc @@ -55,7 +64,7 @@ extern u64 GetCPUTicks(); extern u64 GetTickFrequency(); // Used in Misc,and Windows/Linux files. -extern void ProcessFKeys(int fkey, int shift); // processes fkey related commands value 1-12 +extern void ProcessFKeys(int fkey, struct KeyModifiers *keymod); // processes fkey related commands value 1-12 extern int IsBIOS(const char *filename, char *description); char *ParseLang(char *id); diff --git a/pcsx2/SourceLog.cpp b/pcsx2/SourceLog.cpp index 66b4b037c1..29a42795ed 100644 --- a/pcsx2/SourceLog.cpp +++ b/pcsx2/SourceLog.cpp @@ -38,6 +38,7 @@ FILE *emuLog; #ifdef PCSX2_DEVBUILD u32 varLog; +bool enableLogging = TRUE; // these used by the depreciated _old_Log only u16 logProtocol; @@ -52,8 +53,10 @@ int connected=0; void __Log( const char* fmt, ... ) { char tmp[2024]; - va_list list; + + if (!enableLogging) return; + va_start(list, fmt); // concatenate the log message after the prefix: @@ -123,6 +126,9 @@ static __forceinline void _vSourceLog( u16 protocol, u8 source, u32 cpuPc, u32 c void SourceLog( u16 protocol, u8 source, u32 cpuPc, u32 cpuCycle, const char *fmt, ...) { va_list list; + + if (!enableLogging) return; + va_start(list, fmt); _vSourceLog( protocol, source, cpuPc, cpuCycle, fmt, list ); va_end(list); @@ -133,6 +139,7 @@ void SourceLog( u16 protocol, u8 source, u32 cpuPc, u32 cpuCycle, const char *fm bool SrcLog_##unit( const char* fmt, ... ) \ { \ va_list list; \ + if (!enableLogging) return false; \ va_start( list, fmt ); \ _vSourceLog( protocol, source, \ (source == 'E') ? cpuRegs.pc : psxRegs.pc, \ diff --git a/pcsx2/windows/WinSysExec.cpp b/pcsx2/windows/WinSysExec.cpp index f797094183..ec62fe33d0 100644 --- a/pcsx2/windows/WinSysExec.cpp +++ b/pcsx2/windows/WinSysExec.cpp @@ -386,14 +386,19 @@ namespace HostGui void __fastcall KeyEvent( keyEvent* ev ) { - static int shiftkey = 0; + struct KeyModifiers *keymod = &keymodifiers; if (ev == NULL) return; if (ev->evt == KEYRELEASE) { switch (ev->key) { - case VK_SHIFT: shiftkey = 0; break; + case VK_SHIFT: keymod->shift = FALSE; break; + case VK_CONTROL: keymod->control = FALSE; break; + /* They couldn't just name this something simple, like VK_ALT */ + case VK_MENU: keymod->alt = FALSE; break; + case VK_CAPITAL: keymod->capslock = FALSE; break; + } GSkeyEvent(ev); return; } @@ -402,14 +407,17 @@ namespace HostGui switch (ev->key) { - case VK_SHIFT: shiftkey = 1; break; - + case VK_SHIFT: keymod->shift = TRUE; break; + case VK_CONTROL: keymod->control = TRUE; break; + case VK_MENU: keymod->alt = TRUE; break; + case VK_CAPITAL: keymod->capslock = TRUE; break; + case VK_F1: case VK_F2: case VK_F3: case VK_F4: case VK_F5: case VK_F6: case VK_F7: case VK_F8: case VK_F9: case VK_F10: case VK_F11: case VK_F12: try { - ProcessFKeys(ev->key-VK_F1 + 1, shiftkey); + ProcessFKeys(ev->key-VK_F1 + 1, keymod); } catch( Exception::CpuStateShutdown& ) { From c2d3f78ee12b266e6b79b48986c3047a0d55ffe2 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 3 Apr 2009 02:43:49 +0000 Subject: [PATCH 13/21] Patch r891 so the release build builds. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@892 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/DebugTools/Debug.h | 1 + pcsx2/SourceLog.cpp | 2 +- pcsx2/System.h | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/DebugTools/Debug.h b/pcsx2/DebugTools/Debug.h index 95aed6fe99..1e42cea714 100644 --- a/pcsx2/DebugTools/Debug.h +++ b/pcsx2/DebugTools/Debug.h @@ -71,6 +71,7 @@ namespace R3000A extern char* disR3000AF(u32 code, u32 pc); } +extern bool enableLogging; #ifdef PCSX2_DEVBUILD extern u32 varLog; diff --git a/pcsx2/SourceLog.cpp b/pcsx2/SourceLog.cpp index 29a42795ed..1a51490e9a 100644 --- a/pcsx2/SourceLog.cpp +++ b/pcsx2/SourceLog.cpp @@ -38,13 +38,13 @@ FILE *emuLog; #ifdef PCSX2_DEVBUILD u32 varLog; -bool enableLogging = TRUE; // these used by the depreciated _old_Log only u16 logProtocol; u8 logSource; #endif +bool enableLogging = TRUE; int connected=0; #define SYNC_LOGGING diff --git a/pcsx2/System.h b/pcsx2/System.h index 1c51e8352b..80c7516749 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -28,7 +28,6 @@ #include "Misc.h" #include "Threading.h" // to use threading stuff, include the Threading namespace in your file. - enum PageProtectionMode { Protect_NoAccess = 0, From 33d7e6e4f4ff921f249d0aaca3f02036746f3a9d Mon Sep 17 00:00:00 2001 From: mattmenke Date: Fri, 3 Apr 2009 11:17:33 +0000 Subject: [PATCH 14/21] Multitap should now work in BIOS (Oops). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@893 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Sio.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index 333ba5d85c..16054ff966 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -460,10 +460,9 @@ void SIO_CommandWrite(u8 value,int way) { sio.bufcount = 6; // No idea why this is 6, saved from old code. break; } - // These were taken from old code. No idea if they're needed. - // Don't seem to break anything, at least. - sio.buf[sio.bufcount-1]='+'; - sio.buf[sio.bufcount]='Z'; + // Commented out values are from original code. Break multitap in bios.. + sio.buf[sio.bufcount-1]=0;//'+'; + sio.buf[sio.bufcount]=0;//'Z'; return; case 0x2: sio.packetsize++; @@ -626,7 +625,7 @@ void InitializeSIO(u8 value) const int mcidx = sio.GetMemcardIndex(); - if( sio.activeMemcardSlot[mcidx] ) + if( sio.activeMemcardSlot[mcidx] != 0 ) { // Might want to more agressively declare a card's non-existence here. // As non-zero slots always report a failure, and have to read From 006b81aabf1c77ab5c94e3e02a9bbc296541d16b Mon Sep 17 00:00:00 2001 From: Nneeve Date: Fri, 3 Apr 2009 18:49:23 +0000 Subject: [PATCH 15/21] VU: fixed 1-cycle branch delays fixed an issue with FDIV instructions whose operands are vf00 EE: fixed recompilation of SRA and SRL when shift amount is 0 fixed conditional recompilation #defines a bit (as an aside, this fixes a crash in Ratchet and Clank, but the game has other issues) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@895 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iFPUd.cpp | 3 +++ pcsx2/x86/iR5900.h | 11 ++++++++ pcsx2/x86/iVUzerorec.cpp | 27 ++++++++++++++++--- pcsx2/x86/ix86-32/iR5900Arit.cpp | 28 ++++++++++---------- pcsx2/x86/ix86-32/iR5900Branch.cpp | 8 +++--- pcsx2/x86/ix86-32/iR5900Jump.cpp | 4 +-- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 24 ++++++++--------- pcsx2/x86/ix86-32/iR5900Move.cpp | 12 ++++----- pcsx2/x86/ix86-32/iR5900MultDiv.cpp | 16 +++++------ pcsx2/x86/ix86-32/iR5900Shift.cpp | 38 ++++++++++++++++----------- 10 files changed, 106 insertions(+), 65 deletions(-) diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index cbd8ca19e2..7f6221f08c 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -73,6 +73,8 @@ void LoadCW() { iCWstate = 0; } */ +#ifdef FPU_RECOMPILE + //------------------------------------------------------------------ namespace R5900 { namespace Dynarec { @@ -1058,3 +1060,4 @@ FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); } } } } } +#endif \ No newline at end of file diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 6624cd2342..ff2406db19 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -88,6 +88,17 @@ extern u32 s_nBlockCycles; // cycles of current block recompiling branch = 2; \ } +#define REC_SYS_DEL( f, delreg ) \ + void rec##f( void ) \ + { \ + MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); \ + MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); \ + iFlushCall(FLUSH_EVERYTHING); \ + if( (delreg) > 0 ) _deleteEEreg(delreg, 0); \ + CALLFunc( (uptr)Interp::f ); \ + branch = 2; \ + } + // Used to clear recompiled code blocks during memory/dma write operations. u32 recClearMem(u32 pc); diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index db9ff78fbb..4dea960b87 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -918,20 +918,39 @@ static VuInstruction* getDelayInst(VuInstruction* pInst) // ibeq vi05, vi03 // The ibeq should read the vi05 before the first sqi + //more info: + + // iaddiu vi01, 0, 1 + // ibeq vi01, 0 <- reads vi01 before the iaddiu + + // iaddiu vi01, 0, 1 + // iaddiu vi01, vi01, 1 + // iaddiu vi01, vi01, 1 + // ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1) + + // ilw vi02, addr + // iaddiu vi01, 0, 1 + // ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls + int delay = 1; VuInstruction* pDelayInst = NULL; VuInstruction* pTargetInst = pInst->pPrevInst; - while( 1 ) { // fixme: is 3-cycle delay really maximum? + while( 1 ) { if( pTargetInst != NULL && pTargetInst->info.cycle+delay==pInst->info.cycle && (pTargetInst->regs[0].pipe == VUPIPE_IALU||pTargetInst->regs[0].pipe == VUPIPE_FMAC) && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) - && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff) + && (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff)) && !(pTargetInst->regs[0].VIread&((1<pPrevInst; delay++; + if (delay == 5) //maximum delay is 4 (length of the pipeline) + { + DevCon::WriteLn("supervu: cycle branch delay maximum (4) is reached"); + break; + } } else break; } @@ -2041,9 +2060,9 @@ void VuBaseBlock::AssignVFRegs() _freeXMMreg(free1); _freeXMMreg(free2); } - else if( regs->VIwrite & (1<VIwrite & (1<VIwrite & (1< Date: Fri, 3 Apr 2009 19:09:38 +0000 Subject: [PATCH 16/21] Fixed devel building again, assumably as jake turned the optimizations back on that he didnt want them off. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@896 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 1 + 1 file changed, 1 insertion(+) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 5b376a4972..ba155955e5 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -145,6 +145,7 @@ PrecompiledHeaderFile="$(IntDir)\$(TargetName).pch" WarningLevel="3" SuppressStartupBanner="true" + DebugInformationFormat="0" CompileAs="2" /> Date: Fri, 3 Apr 2009 19:17:40 +0000 Subject: [PATCH 17/21] went a bit mental lol :P git-svn-id: http://pcsx2.googlecode.com/svn/trunk@897 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index ba155955e5..9056cb375f 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -136,6 +136,7 @@ AdditionalIncludeDirectories="" PreprocessorDefinitions="NDEBUG" StringPooling="true" + MinimalRebuild="true" ExceptionHandling="2" SmallerTypeCheck="false" BufferSecurityCheck="false" @@ -145,7 +146,7 @@ PrecompiledHeaderFile="$(IntDir)\$(TargetName).pch" WarningLevel="3" SuppressStartupBanner="true" - DebugInformationFormat="0" + DebugInformationFormat="3" CompileAs="2" /> Date: Fri, 3 Apr 2009 19:30:53 +0000 Subject: [PATCH 18/21] Further fix for crash n burn (videos were invisible) hopefully fixes ATV too, but i dont have it to test. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@898 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 7cc43a2f3f..2413640c88 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -2133,7 +2133,7 @@ int _VIF1chain() u32 *pMem; u32 ret; - if (vif1ch->qwc == 0 && vif1.vifstalled == 0 && vif1.irqoffset == 0) + if (vif1ch->qwc == 0) { vif1.inprogress = 0; return 0; @@ -2233,7 +2233,7 @@ __forceinline void vif1SetupTransfer() } } - + vif1.irqoffset = 0; vif1.done |= hwDmacSrcChainWithStack(vif1ch, id); if ((vif1ch->chcr & 0x80) && (vif1ptag[0] >> 31)) //Check TIE bit of CHCR and IRQ bit of tag @@ -2407,7 +2407,7 @@ void vif1Write32(u32 mem, u32 value) // just stoppin the VIF (linuz). vif1Regs->stat |= VIF1_STAT_VSS; vif1Regs->stat &= ~VIF1_STAT_VPS; - vif1.inprogress = 0; + cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's vif1.vifstalled = 1; } if (value & 0x8) From fdcff573652e555dacf5854d26a2b2344d04de53 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 3 Apr 2009 22:51:51 +0000 Subject: [PATCH 19/21] Reworked the unpacking code in Vif.cpp. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@900 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 255 ++++++++++++++++++++++++++++---------------------- 1 file changed, 141 insertions(+), 114 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 6d9ec34b9c..2fd308ce78 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -18,54 +18,47 @@ #include "PrecompiledHeader.h" #include +#include #include "Common.h" #include "VUmicro.h" - #include "Vif.h" #include "VifDma.h" -#include - VIFregisters *_vifRegs; +u32* _vifRow = NULL, *_vifCol = NULL; u32* _vifMaskRegs = NULL; +vifStruct *_vif; + PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]); PCSX2_ALIGNED16(u32 g_vifRow1[4]); PCSX2_ALIGNED16(u32 g_vifCol1[4]); -u32* _vifRow = NULL, *_vifCol = NULL; -vifStruct *_vif; - -static int n; - -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) - -static int cycles; +//static int cycles; extern int g_vifCycles; u16 vifqwc = 0; -u32 mfifodmairq = 0; +bool mfifodmairq = FALSE; + +enum UnpackOffset +{ + OFFSET_X = 0, + OFFSET_Y, + OFFSET_Z, + OFFSET_W +}; + +#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) __forceinline static int _limit(int a, int max) { return (a > max) ? max : a; } - -#define _UNPACKpart( offnum, func ) \ - if ( ( size > 0 ) && ( _vifRegs->offset == offnum ) ) { \ - func; \ - size--; \ - _vifRegs->offset++; \ - } - -#define _UNPACKpart_nosize( offnum, func ) \ - if ( ( _vifRegs->offset == offnum ) ) { \ - func; \ - _vifRegs->offset++; \ - } - + static __releaseinline void writeX(u32 *dest, u32 data) { + int n; + if (_vifRegs->code & 0x10000000) { switch (_vif->cl) @@ -133,6 +126,8 @@ static __releaseinline void writeX(u32 *dest, u32 data) static __releaseinline void writeY(u32 *dest, u32 data) { + int n; + if (_vifRegs->code & 0x10000000) { switch (_vif->cl) @@ -200,6 +195,8 @@ static __releaseinline void writeY(u32 *dest, u32 data) static __releaseinline void writeZ(u32 *dest, u32 data) { + int n; + if (_vifRegs->code & 0x10000000) { switch (_vif->cl) @@ -267,6 +264,8 @@ static __releaseinline void writeZ(u32 *dest, u32 data) static __releaseinline void writeW(u32 *dest, u32 data) { + int n; + if (_vifRegs->code & 0x10000000) { switch (_vif->cl) @@ -332,61 +331,89 @@ static __releaseinline void writeW(u32 *dest, u32 data) // VIF_LOG("writeW %8.8x : Mode %d, r3 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r3,data); } +static __forceinline bool __fastcall _UNPACKpart(u32 offnum, u32 *x, u32 y) +{ + if (_vifRegs->offset == offnum) + { + switch (offnum) + { + case OFFSET_X: + writeX(x,y); + break; + case OFFSET_Y: + writeY(x,y); + break; + case OFFSET_Z: + writeZ(x,y); + break; + case OFFSET_W: + writeW(x,y); + break; + default: + break; + } + _vifRegs->offset++; + + return TRUE; + } + return FALSE; +} + void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) { - _UNPACKpart(0, writeX(dest++, *data)); - _UNPACKpart(1, writeY(dest++, *data)); - _UNPACKpart(2, writeZ(dest++, *data)); - _UNPACKpart(3, writeW(dest , *data)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *data)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *data)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_S_16s(u32 *dest, u32 *data, int size) { s16 *sdata = (s16*)data; - _UNPACKpart(0, writeX(dest++, *sdata)); - _UNPACKpart(1, writeY(dest++, *sdata)); - _UNPACKpart(2, writeZ(dest++, *sdata)); - _UNPACKpart(3, writeW(dest , *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *sdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_S_16u(u32 *dest, u32 *data, int size) { const u16 *sdata = (u16*)data; - _UNPACKpart(0, writeX(dest++, *sdata)); - _UNPACKpart(1, writeY(dest++, *sdata)); - _UNPACKpart(2, writeZ(dest++, *sdata)); - _UNPACKpart(3, writeW(dest , *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *sdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_S_8s(u32 *dest, u32 *data, int size) { s8 *cdata = (s8*)data; - _UNPACKpart(0, writeX(dest++, *cdata)); - _UNPACKpart(1, writeY(dest++, *cdata)); - _UNPACKpart(2, writeZ(dest++, *cdata)); - _UNPACKpart(3, writeW(dest , *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *cdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_S_8u(u32 *dest, u32 *data, int size) { u8 *cdata = (u8*)data; - _UNPACKpart(0, writeX(dest++, *cdata)); - _UNPACKpart(1, writeY(dest++, *cdata)); - _UNPACKpart(2, writeZ(dest++, *cdata)); - _UNPACKpart(3, writeW(dest , *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata))size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *cdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V2_32(u32 *dest, u32 *data, int size) { - _UNPACKpart(0, writeX(dest++, *data++)); - _UNPACKpart(1, writeY(dest++, *data--)); - _UNPACKpart_nosize(2, writeZ(dest++, *data)); - _UNPACKpart_nosize(3, writeW(dest , 0)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data--)) size--; + _UNPACKpart(OFFSET_Z, dest++, *data); + _UNPACKpart(OFFSET_W, dest, 0); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } @@ -394,148 +421,148 @@ void __fastcall UNPACK_V2_32(u32 *dest, u32 *data, int size) void __fastcall UNPACK_V2_16s(u32 *dest, u32 *data, int size) { s16 *sdata = (s16*)data; - _UNPACKpart(0, writeX(dest++, *sdata++)); - _UNPACKpart(1, writeY(dest++, *sdata--)); - _UNPACKpart_nosize(2, writeZ(dest++, *sdata++)); - _UNPACKpart_nosize(3, writeW(dest , *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata--)) size--; + _UNPACKpart(OFFSET_Z, dest++, *sdata++); + _UNPACKpart(OFFSET_W, dest , *sdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V2_16u(u32 *dest, u32 *data, int size) { u16 *sdata = (u16*)data; - _UNPACKpart(0, writeX(dest++, *sdata++)); - _UNPACKpart(1, writeY(dest++, *sdata--)); - _UNPACKpart_nosize(2, writeZ(dest++, *sdata++)); - _UNPACKpart_nosize(3, writeW(dest , *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)); + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata--)); + _UNPACKpart(OFFSET_Z, dest++, *sdata++); + _UNPACKpart(OFFSET_W, dest , *sdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V2_8s(u32 *dest, u32 *data, int size) { s8 *cdata = (s8*)data; - _UNPACKpart(0, writeX(dest++, *cdata++)); - _UNPACKpart(1, writeY(dest++, *cdata--)); - _UNPACKpart_nosize(2, writeZ(dest++, *cdata++)); - _UNPACKpart_nosize(3, writeW(dest , *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata--)) size--; + _UNPACKpart(OFFSET_Z, dest++, *cdata++); + _UNPACKpart(OFFSET_W, dest , *cdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V2_8u(u32 *dest, u32 *data, int size) { u8 *cdata = (u8*)data; - _UNPACKpart(0, writeX(dest++, *cdata++)); - _UNPACKpart(1, writeY(dest++, *cdata--)); - _UNPACKpart_nosize(2, writeZ(dest++, *cdata++)); - _UNPACKpart_nosize(3, writeW(dest , *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata--)) size--; + _UNPACKpart(OFFSET_Z, dest++, *cdata++); + _UNPACKpart(OFFSET_W, dest , *cdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V3_32(u32 *dest, u32 *data, int size) { - _UNPACKpart(0, writeX(dest++, *data++);); - _UNPACKpart(1, writeY(dest++, *data++);); - _UNPACKpart(2, writeZ(dest++, *data++);); - _UNPACKpart_nosize(3, writeW(dest, *data);); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *data++)) size--; + _UNPACKpart(OFFSET_W, dest, *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V3_16s(u32 *dest, u32 *data, int size) { s16 *sdata = (s16*)data; - _UNPACKpart(0, writeX(dest++, *sdata++)); - _UNPACKpart(1, writeY(dest++, *sdata++)); - _UNPACKpart(2, writeZ(dest++, *sdata++)); - _UNPACKpart_nosize(3, writeW(dest, *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; + _UNPACKpart(OFFSET_W, dest, *sdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V3_16u(u32 *dest, u32 *data, int size) { u16 *sdata = (u16*)data; - _UNPACKpart(0, writeX(dest++, *sdata++)); - _UNPACKpart(1, writeY(dest++, *sdata++)); - _UNPACKpart(2, writeZ(dest++, *sdata++)); - _UNPACKpart_nosize(3, writeW(dest, *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; + _UNPACKpart(OFFSET_W, dest, *sdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V3_8s(u32 *dest, u32 *data, int size) { s8 *cdata = (s8*)data; - _UNPACKpart(0, writeX(dest++, *cdata++)); - _UNPACKpart(1, writeY(dest++, *cdata++)); - _UNPACKpart(2, writeZ(dest++, *cdata++)); - _UNPACKpart_nosize(3, writeW(dest, *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; + _UNPACKpart(OFFSET_W, dest, *cdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V3_8u(u32 *dest, u32 *data, int size) { u8 *cdata = (u8*)data; - _UNPACKpart(0, writeX(dest++, *cdata++)); - _UNPACKpart(1, writeY(dest++, *cdata++)); - _UNPACKpart(2, writeZ(dest++, *cdata++)); - _UNPACKpart_nosize(3, writeW(dest, *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; + _UNPACKpart(OFFSET_W, dest, *cdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_32(u32 *dest, u32 *data , int size) { - _UNPACKpart(0, writeX(dest++, *data++)); - _UNPACKpart(1, writeY(dest++, *data++)); - _UNPACKpart(2, writeZ(dest++, *data++)); - _UNPACKpart(3, writeW(dest , *data)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *data++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *data)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_16s(u32 *dest, u32 *data, int size) { s16 *sdata = (s16*)data; - _UNPACKpart(0, writeX(dest++, *sdata++)); - _UNPACKpart(1, writeY(dest++, *sdata++)); - _UNPACKpart(2, writeZ(dest++, *sdata++)); - _UNPACKpart(3, writeW(dest , *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *sdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_16u(u32 *dest, u32 *data, int size) { u16 *sdata = (u16*)data; - _UNPACKpart(0, writeX(dest++, *sdata++)); - _UNPACKpart(1, writeY(dest++, *sdata++)); - _UNPACKpart(2, writeZ(dest++, *sdata++)); - _UNPACKpart(3, writeW(dest , *sdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *sdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_8s(u32 *dest, u32 *data, int size) { s8 *cdata = (s8*)data; - _UNPACKpart(0, writeX(dest++, *cdata++)); - _UNPACKpart(1, writeY(dest++, *cdata++)); - _UNPACKpart(2, writeZ(dest++, *cdata++)); - _UNPACKpart(3, writeW(dest , *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *cdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_8u(u32 *dest, u32 *data, int size) { u8 *cdata = (u8*)data; - _UNPACKpart(0, writeX(dest++, *cdata++)); - _UNPACKpart(1, writeY(dest++, *cdata++)); - _UNPACKpart(2, writeZ(dest++, *cdata++)); - _UNPACKpart(3, writeW(dest , *cdata)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *cdata)) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - _UNPACKpart(0, writeX(dest++, (*data & 0x001f) << 3);); - _UNPACKpart(1, writeY(dest++, (*data & 0x03e0) >> 2);); - _UNPACKpart(2, writeZ(dest++, (*data & 0x7c00) >> 7);); - _UNPACKpart(3, writeW(dest , (*data & 0x8000) >> 8);); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, ((*data & 0x001f) << 3))) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, ((*data & 0x03e0) >> 2))) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, ((*data & 0x7c00) >> 7))) size--; + if (size > 0) if (_UNPACKpart(OFFSET_W, dest , ((*data & 0x8000) >> 8))) size--; if (_vifRegs->offset == 4) _vifRegs->offset = 0; } @@ -648,7 +675,7 @@ void mfifoVIF1transfer(int qwc) return; } - mfifodmairq = 0; //Clear any previous TIE interrupt + mfifodmairq = FALSE; //Clear any previous TIE interrupt if (vif1ch->qwc == 0) { @@ -669,7 +696,7 @@ void mfifoVIF1transfer(int qwc) id = (ptag[0] >> 28) & 0x7; vif1ch->qwc = (ptag[0] & 0xffff); vif1ch->madr = ptag[1]; - cycles += 2; + //cycles += 2; vif1ch->chcr = (vif1ch->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); @@ -715,7 +742,7 @@ void mfifoVIF1transfer(int qwc) { VIF_LOG("dmaIrq Set"); vif1.done = 1; - mfifodmairq = 1; //Let the handler know we have prematurely ended MFIFO + mfifodmairq = TRUE; //Let the handler know we have prematurely ended MFIFO } } @@ -772,7 +799,7 @@ void vifMFIFOInterrupt() //On a TIE break we do not clear the MFIFO (Art of Fighting) //If we dont clear it on MFIFO end, Tekken Tag breaks, understandably (Refraction) - if (mfifodmairq == 0) vifqwc = 0; + if (!mfifodmairq) vifqwc = 0; vif1.done = 1; g_vifCycles = 0; From 12cba61302ef9f17761ab9357709691ae289cf01 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 3 Apr 2009 23:18:11 +0000 Subject: [PATCH 20/21] Fix typo in r900. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@902 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 2fd308ce78..0f778f212a 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -431,8 +431,8 @@ void __fastcall UNPACK_V2_16s(u32 *dest, u32 *data, int size) void __fastcall UNPACK_V2_16u(u32 *dest, u32 *data, int size) { u16 *sdata = (u16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)); - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata--)); + if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; + if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata--)) size--; _UNPACKpart(OFFSET_Z, dest++, *sdata++); _UNPACKpart(OFFSET_W, dest , *sdata); if (_vifRegs->offset == 4) _vifRegs->offset = 0; From ebcedccf23446f3679d41b3083bd65e8423c1d33 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 4 Apr 2009 06:20:48 +0000 Subject: [PATCH 21/21] pcsx2: - removed some obsolete 'iCWstate' code. microVU: - implemented more rec first pass stuff for the lower instructions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@903 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iCore.h | 2 +- pcsx2/x86/iFPU.cpp | 33 ------------------------- pcsx2/x86/iFPU.h | 3 --- pcsx2/x86/iFPUd.cpp | 35 +-------------------------- pcsx2/x86/iR5900.h | 4 --- pcsx2/x86/iVU0micro.cpp | 3 +-- pcsx2/x86/iVU1micro.cpp | 1 - pcsx2/x86/ix86-32/iCore-32.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 8 +----- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 2 -- pcsx2/x86/microVU_Alloc.h | 6 +++-- pcsx2/x86/microVU_Analyze.inl | 26 +++++++++++++++++--- pcsx2/x86/microVU_Compile.inl | 4 ++- pcsx2/x86/microVU_Lower.inl | 32 ++++++++++++------------ pcsx2/x86/microVU_Misc.h | 2 +- 15 files changed, 52 insertions(+), 111 deletions(-) diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 8191ddcfc2..3397805068 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -362,7 +362,7 @@ void _recMove128MtoRmOffset(u32 offset, u32 from); extern int _signExtendGPRtoMMX(x86MMXRegType to, u32 gprreg, int shift); extern _mmxregs mmxregs[MMXREGS], s_saveMMXregs[MMXREGS]; -extern u16 x86FpuState, iCWstate; +extern u16 x86FpuState; extern void iDumpRegisters(u32 startpc, u32 temp); diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index 601159d52c..27dfd1471f 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -23,39 +23,6 @@ #include "iR5900.h" #include "iFPU.h" -//------------------------------------------------------------------ -// Misc... -//------------------------------------------------------------------ -//static u32 _mxcsr = 0x7F80; -//static u32 _mxcsrs; -static u32 fpucw = 0x007f; -static u32 fpucws = 0; - -void SaveCW(int type) { - if (iCWstate & type) return; - - if (type == 2) { -// SSE_STMXCSR((uptr)&_mxcsrs); -// SSE_LDMXCSR((uptr)&_mxcsr); - } else { - FNSTCW( (uptr)&fpucws ); - FLDCW( (uptr)&fpucw ); - } - iCWstate|= type; -} - -void LoadCW() { - if (iCWstate == 0) return; - - if (iCWstate & 2) { - //SSE_LDMXCSR((uptr)&_mxcsrs); - } - if (iCWstate & 1) { - FLDCW( (uptr)&fpucws ); - } - iCWstate = 0; -} - //------------------------------------------------------------------ namespace R5900 { namespace Dynarec { diff --git a/pcsx2/x86/iFPU.h b/pcsx2/x86/iFPU.h index d6b566999f..f813612eaa 100644 --- a/pcsx2/x86/iFPU.h +++ b/pcsx2/x86/iFPU.h @@ -22,9 +22,6 @@ namespace R5900 { namespace Dynarec { - void SaveCW(); - void LoadCW(); - namespace OpcodeImpl { namespace COP1 { diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index 7f6221f08c..ec7b489b76 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -39,40 +39,7 @@ //set I&D flags. also impacts other aspects of DIV/R/SQRT correctness #define FPU_FLAGS_ID 1 - -//------------------------------------------------------------------ -// Misc... -//------------------------------------------------------------------ -//static u32 _mxcsr = 0x7F80; -//static u32 _mxcsrs; -/*static u32 fpucw = 0x007f; -static u32 fpucws = 0; - -void SaveCW(int type) { - if (iCWstate & type) return; - - if (type == 2) { -// SSE_STMXCSR((uptr)&_mxcsrs); -// SSE_LDMXCSR((uptr)&_mxcsr); - } else { - FNSTCW( (uptr)&fpucws ); - FLDCW( (uptr)&fpucw ); - } - iCWstate|= type; -} - -void LoadCW() { - if (iCWstate == 0) return; - - if (iCWstate & 2) { - //SSE_LDMXCSR((uptr)&_mxcsrs); - } - if (iCWstate & 1) { - FLDCW( (uptr)&fpucws ); - } - iCWstate = 0; -} -*/ + #ifdef FPU_RECOMPILE //------------------------------------------------------------------ diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index ff2406db19..15fc5ef33b 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -117,10 +117,6 @@ void iFlushCall(int flushtype); void recBranchCall( void (*func)() ); void recCall( void (*func)(), int delreg ); -// these are defined in iFPU.cpp -void LoadCW(); -void SaveCW(int type); - extern void recExecute(); // same as recCpu.Execute(), but faster (can be inline'd) namespace R5900{ diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index b3c9887908..4b70381294 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -44,9 +44,8 @@ namespace VU0micro { SuperVUReset(0); - // these shouldn't be needed, but shouldn't hurt anything either. + // this shouldn't be needed, but shouldn't hurt anything either. x86FpuState = FPU_STATE; - iCWstate = 0; } static void recStep() diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index c56eae133c..2c4cc00bb4 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -85,7 +85,6 @@ namespace VU1micro // these shouldn't be needed, but shouldn't hurt anything either. x86FpuState = FPU_STATE; - iCWstate = 0; } static void recStep() diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index d3557228e0..0a70906ba8 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -33,7 +33,7 @@ using namespace std; extern u32 g_psxConstRegs[32]; -u16 x86FpuState, iCWstate; +u16 x86FpuState; u16 g_mmxAllocCounter = 0; // X86 caching diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index b38fa22580..06f3b62b92 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -92,7 +92,7 @@ static u32 s_nHasDelay = 0; // save states for branches GPR_reg64 s_saveConstRegs[32]; -static u16 s_savex86FpuState, s_saveiCWstate; +static u16 s_savex86FpuState; static u32 s_saveHasConstReg = 0, s_saveFlushedConstReg = 0, s_saveRegHasLive1 = 0, s_saveRegHasSignExt = 0; static EEINST* s_psaveInstInfo = NULL; @@ -587,7 +587,6 @@ void recResetEE( void ) recPtr = recMem; recStackPtr = recStack; x86FpuState = FPU_STATE; - iCWstate = 0; branch = 0; SetCPUState(Config.sseMXCSR, Config.sseVUMXCSR); @@ -987,7 +986,6 @@ void SetBranchImm( u32 imm ) void SaveBranchState() { s_savex86FpuState = x86FpuState; - s_saveiCWstate = iCWstate; s_savenBlockCycles = s_nBlockCycles; memcpy(s_saveConstRegs, g_cpuConstRegs, sizeof(g_cpuConstRegs)); s_saveHasConstReg = g_cpuHasConstReg; @@ -1004,7 +1002,6 @@ void SaveBranchState() void LoadBranchState() { x86FpuState = s_savex86FpuState; - iCWstate = s_saveiCWstate; s_nBlockCycles = s_savenBlockCycles; memcpy(g_cpuConstRegs, s_saveConstRegs, sizeof(g_cpuConstRegs)); @@ -1036,8 +1033,6 @@ void iFlushCall(int flushtype) if( flushtype & FLUSH_CACHED_REGS ) _flushConstRegs(); - LoadCW(); - if (x86FpuState==MMX_STATE) { if (cpucaps.has3DNOWInstructionExtensions) FEMMS(); else EMMS(); @@ -1406,7 +1401,6 @@ void recRecompile( const u32 startpc ) s_nBlockCycles = 0; pc = startpc; x86FpuState = FPU_STATE; - iCWstate = 0; g_cpuHasConstReg = g_cpuFlushedConstReg = 1; g_cpuPrevRegHasLive1 = g_cpuRegHasLive1 = 0xffffffff; g_cpuPrevRegHasSignExt = g_cpuRegHasSignExt = 0; diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 37ee17bcfa..83925362c3 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -230,8 +230,6 @@ int recSetMemLocation(int regs, int imm, int mmreg, int msize, int j32) if ( imm != 0 ) ADD32ItoR( ECX, imm ); - LoadCW(); - #ifdef _DEBUG //CALLFunc((uptr)testaddrs); #endif diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 4361ea0e74..c539eb1297 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -40,6 +40,8 @@ struct microTempRegInfo { u8 VFreg[2]; // Index of the VF reg u8 VI; // Holds cycle info for Id u8 VIreg; // Index of the VI reg + u8 q; // Holds cycle info for Q reg + u8 p; // Holds cycle info for P reg }; template @@ -49,9 +51,9 @@ struct microAllocInfo { u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time. - u32 curPC; // Current PC + u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block - u32 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) + u32 curPC; // Current PC u32 info[pSize]; // bit 00 = Lower Instruction is NOP // bit 01 // bit 02 diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 23050cd585..f35299ab28 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -27,8 +27,8 @@ // FMAC1 - Normal FMAC Opcodes //------------------------------------------------------------------ -#define aReg(x) mVUallocInfo.regs.VF[x] -#define bReg(x) mVUallocInfo.regsTemp.VFreg[0] = x; mVUallocInfo.regsTemp.VF[0] +#define aReg(x) mVUregs.VF[x] +#define bReg(x) mVUregsTemp.VFreg[0] = x; mVUregsTemp.VF[0] #define aMax(x, y) ((x > y) ? x : y) #define analyzeReg1(reg) { \ @@ -117,10 +117,30 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { } \ } -microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf) { +#define analyzeQreg(x) { mVUregsTemp.q = x; mVUstall = aMax(mVUstall, mVUregs.q); } +#define analyzePreg(x) { mVUregsTemp.p = x; mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } + +microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { microVU* mVU = mVUx; analyzeReg5(Fs, Fsf); analyzeReg5(Ft, Ftf); + analyzeQreg(xCycles); +} + +//------------------------------------------------------------------ +// EFU - EFU Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeEFU1(int Fs, int Fsf, u8 xCycles) { + microVU* mVU = mVUx; + analyzeReg5(Fs, Fsf); + analyzePreg(xCycles); +} + +microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { + microVU* mVU = mVUx; + analyzeReg1(Fs); + analyzePreg(xCycles); } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index e07dff38e8..49c517de5e 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -50,8 +50,10 @@ microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg; - mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg; + mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg); mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI; + mVUregs.q = mVUregsTemp.q; + mVUregs.p = mVUregsTemp.p; } microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 7d6d83a51b..8e7859e033 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -41,7 +41,7 @@ microVUf(void) mVU_DIV() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_); } + if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 7); } else { u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); @@ -76,7 +76,7 @@ microVUf(void) mVU_DIV() { microVUf(void) mVU_SQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_); } + if (!recPass) { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_, 7); } else { u8 *ajmp; getReg5(xmmFt, _Ft_, _Ftf_); @@ -93,7 +93,7 @@ microVUf(void) mVU_SQRT() { microVUf(void) mVU_RSQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_); } + if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 13); } else { u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); @@ -158,7 +158,7 @@ microVUt(void) mVU_EATAN_() { microVUf(void) mVU_EATAN() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 54); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -174,7 +174,7 @@ microVUf(void) mVU_EATAN() { microVUf(void) mVU_EATANxy() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 54); } else { getReg6(xmmFt, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01); @@ -191,7 +191,7 @@ microVUf(void) mVU_EATANxy() { microVUf(void) mVU_EATANxz() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 54); } else { getReg6(xmmFt, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02); @@ -215,7 +215,7 @@ microVUf(void) mVU_EATANxz() { microVUf(void) mVU_EEXP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 44); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -263,7 +263,7 @@ microVUt(void) mVU_sumXYZ() { microVUf(void) mVU_ELENG() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 18); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -275,7 +275,7 @@ microVUf(void) mVU_ELENG() { microVUf(void) mVU_ERCPR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -289,7 +289,7 @@ microVUf(void) mVU_ERCPR() { microVUf(void) mVU_ERLENG() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 24); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -304,7 +304,7 @@ microVUf(void) mVU_ERLENG() { microVUf(void) mVU_ERSADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 18); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -319,7 +319,7 @@ microVUf(void) mVU_ERSADD() { microVUf(void) mVU_ERSQRT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 18); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -333,7 +333,7 @@ microVUf(void) mVU_ERSQRT() { microVUf(void) mVU_ESADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 11); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -351,7 +351,7 @@ microVUf(void) mVU_ESADD() { microVUf(void) mVU_ESIN() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 29); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -377,7 +377,7 @@ microVUf(void) mVU_ESIN() { microVUf(void) mVU_ESQRT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } else { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -388,7 +388,7 @@ microVUf(void) mVU_ESQRT() { microVUf(void) mVU_ESUM() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeEFU2(_Fs_, 12); } else { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 28231ca34e..ce9272f201 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -99,7 +99,7 @@ declareAllVariables #define _Mbit_ (1<<29) #define _Dbit_ (1<<28) #define _Tbit_ (1<<27) -#define _MDTbit_ ( _Mbit_ | _Dbit_ | _Tbit_ ) +#define _MDTbit_ 0 //( _Mbit_ | _Dbit_ | _Tbit_ ) // ToDo: Implement this stuff... #define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) #define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))