From 2356f75396ce063b91cdd63a28e5f7bf7d55921b Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 13 Oct 2008 06:09:46 +0000 Subject: [PATCH] fixed some bad VU1 opcodes (some opcodes didn't even return the correct register, so the correct result would end up in the wrong reg lol) also cleaned up some DMA code a tiny bit, and added FreezeXMMRegs() where it was needed. git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@194 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/PsxDma.c | 2 + pcsx2/Sif.c | 38 +++--------------- pcsx2/x86/iVUmicro.c | 94 +++++++++++++------------------------------- 3 files changed, 34 insertions(+), 100 deletions(-) diff --git a/pcsx2/PsxDma.c b/pcsx2/PsxDma.c index caf6ac67b1..c5656dafc8 100644 --- a/pcsx2/PsxDma.c +++ b/pcsx2/PsxDma.c @@ -191,10 +191,12 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) { iopsifbusy[1] = 1; psHu32(0x1000F240) |= 0x4000; if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) { + FreezeXMMRegs(1); SIF1Dma(); psHu32(0x1000F240) &= ~0x40; psHu32(0x1000F240) &= ~0x100; psHu32(0x1000F240) &= ~0x4000; + FreezeXMMRegs(0); } } diff --git a/pcsx2/Sif.c b/pcsx2/Sif.c index 2717de4496..d758ca276f 100644 --- a/pcsx2/Sif.c +++ b/pcsx2/Sif.c @@ -368,13 +368,10 @@ _inline void SIF1Dma() notDone = 1; do { - - if(eesifbusy[1] == 1) // If EE SIF1 is enabled { - if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) { // STS == fromSIF1 - SysPrintf("SIF1 stall control\n"); - } + + if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) SysPrintf("SIF1 stall control\n"); // STS == fromSIF1 if(sif1dma->qwc == 0) // If there's no more to transfer { @@ -383,9 +380,7 @@ _inline void SIF1Dma() // Stop & signal interrupts on EE //sif1dma->chcr &= ~0x100; //hwDmacIrq(6); -#ifdef SIF_LOG SIF_LOG("EE SIF1 End %x\n", sif1.end); -#endif eesifbusy[1] = 0; notDone = 0; INT(6, cycles*BIAS); @@ -406,48 +401,38 @@ _inline void SIF1Dma() } sif1.chain = 1; - id = (ptag[0] >> 28) & 0x7; + id = (ptag[0] >> 28) & 0x7; switch(id) { case 0: // refe -#ifdef SIF_LOG SIF_LOG(" REFE %08X\n", ptag[1]); -#endif sif1.end = 1; sif1dma->madr = ptag[1]; sif1dma->tadr += 16; break; case 1: // cnt -#ifdef SIF_LOG SIF_LOG(" CNT\n"); -#endif sif1dma->madr = sif1dma->tadr + 16; sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4); break; case 2: // next -#ifdef SIF_LOG SIF_LOG(" NEXT %08X\n", ptag[1]); -#endif sif1dma->madr = sif1dma->tadr + 16; sif1dma->tadr = ptag[1]; break; case 3: // ref case 4: // refs -#ifdef SIF_LOG SIF_LOG(" REF %08X\n", ptag[1]); -#endif sif1dma->madr = ptag[1]; sif1dma->tadr += 16; break; case 7: // end -#ifdef SIF_LOG SIF_LOG(" END\n"); -#endif sif1.end = 1; sif1dma->madr = sif1dma->tadr + 16; sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4); @@ -468,7 +453,6 @@ _inline void SIF1Dma() u32 *data; //notDone = 1; - _dmaGetAddr(sif1dma, data, sif1dma->madr, 6); if(qwTransfer > (FIFO_SIF1_W-sif1.fifoSize)/4) // Copy part of sif1dma into FIFO @@ -492,12 +476,9 @@ _inline void SIF1Dma() {*/ int readSize = size; - if(readSize > sif1.fifoSize) - readSize = sif1.fifoSize; + if(readSize > sif1.fifoSize) readSize = sif1.fifoSize; -#ifdef SIF_LOG SIF_LOG(" IOP SIF doing transfer %04X to %08X\n", readSize, HW_DMA10_MADR); -#endif SIF1read((u32*)PSXM(HW_DMA10_MADR), readSize); psxCpu->Clear(HW_DMA10_MADR, readSize); @@ -513,9 +494,7 @@ _inline void SIF1Dma() if(sif1.tagMode & 0x80) // Stop on tag IRQ { // Tag interrupt -#ifdef SIF_LOG SIF_LOG(" IOP SIF interrupt\n"); -#endif //HW_DMA10_CHCR &= ~0x01000000; //reset TR flag //psxDmaInterrupt2(3); iopsifbusy[1] = 0; @@ -527,9 +506,7 @@ _inline void SIF1Dma() else if(sif1.tagMode & 0x40) // Stop on tag END { // End tag. -#ifdef SIF_LOG SIF_LOG(" IOP SIF end\n"); -#endif //HW_DMA10_CHCR &= ~0x01000000; //reset TR flag //psxDmaInterrupt2(3); iopsifbusy[1] = 0; @@ -541,12 +518,8 @@ _inline void SIF1Dma() else if(sif1.fifoSize >= 4) // Read a tag { struct sifData d; - SIF1read((u32*)&d, 4); - -#ifdef SIF_LOG SIF_LOG(" IOP SIF dest chain tag madr:%08X wc:%04X id:%X irq:%d\n", d.data & 0xffffff, d.words, (d.data>>28)&7, (d.data>>31)&1); -#endif HW_DMA10_MADR = d.data & 0xffffff; sif1.counter = d.words; sif1.tagMode = (d.data >> 24) & 0xFF; @@ -554,8 +527,7 @@ _inline void SIF1Dma() } } } - }while(notDone); - + } while (notDone); } _inline void sif0Interrupt() { diff --git a/pcsx2/x86/iVUmicro.c b/pcsx2/x86/iVUmicro.c index e7a2bcb74d..1054f993f1 100644 --- a/pcsx2/x86/iVUmicro.c +++ b/pcsx2/x86/iVUmicro.c @@ -5228,22 +5228,25 @@ void recVUMI_WAITP(VURegs *VU, int info) } // in all EFU insts, EEREC_D is a temp reg -void vuSqSumXYZ(int regd, int regs, int regtemp) +void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2 { + //SysPrintf("VU: SUMXYZ\n"); if( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVAPS_XMM_to_XMM(regd, regs); + if (CHECK_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0x8); SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71); } else { SSE_MOVAPS_XMM_to_XMM(regtemp, regs); + if (CHECK_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0x8); SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2 if( cpucaps.hasStreamingSIMD3Extensions ) { SSE3_HADDPS_XMM_to_XMM(regd, regtemp); - SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x + SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2 + SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z } else { SSE_MOVSS_XMM_to_XMM(regd, regtemp); @@ -5252,83 +5255,42 @@ void vuSqSumXYZ(int regd, int regs, int regtemp) SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2 //SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6); + + /* TODO: check if this code does the same thing as above code + SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + */ } } - - //SysPrintf("SUMXYZ\n"); } void recVUMI_ESADD( VURegs *VU, int info) { + //SysPrintf("VU: ESADD\n"); assert( VU == &VU1 ); - //SysPrintf("ESADD\n"); if( EEREC_TEMP == EEREC_D ) { // special code to reset P (don't know if this is still useful!) SysPrintf("ESADD: Resetting P reg!!!\n"); MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0); return; } - if( cpucaps.hasStreamingSIMD4Extensions ) - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71); - } - else - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x - } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - - vuFloat2(EEREC_TEMP, EEREC_D, 0x8); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); + if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); } +const static PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; + void recVUMI_ERSADD( VURegs *VU, int info ) { + //SysPrintf("VU: ERSADD\n"); assert( VU == &VU1 ); - // almost same as vuSqSumXYZ - - if( cpucaps.hasStreamingSIMD4Extensions ) - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71); - } - else - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x - } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); - + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); // don't use RCPSS (very bad precision) + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); - vuFloat2(EEREC_TEMP, EEREC_D, 0x8); - + if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); } @@ -5338,9 +5300,8 @@ void recVUMI_ELENG( VURegs *VU, int info ) assert( VU == &VU1 ); vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); } void recVUMI_ERLENG( VURegs *VU, int info ) @@ -5349,9 +5310,8 @@ void recVUMI_ERLENG( VURegs *VU, int info ) assert( VU == &VU1 ); vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // sqrt(x^2 + y^2 + z^2) - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // d <- sqrt(x^2 + y^2 + z^2) - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // temp <- 1 + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2) + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2) if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);