diff --git a/pcsx2/PsxDma.c b/pcsx2/PsxDma.c index caf6ac67b1..c5656dafc8 100644 --- a/pcsx2/PsxDma.c +++ b/pcsx2/PsxDma.c @@ -191,10 +191,12 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) { iopsifbusy[1] = 1; psHu32(0x1000F240) |= 0x4000; if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) { + FreezeXMMRegs(1); SIF1Dma(); psHu32(0x1000F240) &= ~0x40; psHu32(0x1000F240) &= ~0x100; psHu32(0x1000F240) &= ~0x4000; + FreezeXMMRegs(0); } } diff --git a/pcsx2/Sif.c b/pcsx2/Sif.c index 2717de4496..d758ca276f 100644 --- a/pcsx2/Sif.c +++ b/pcsx2/Sif.c @@ -368,13 +368,10 @@ _inline void SIF1Dma() notDone = 1; do { - - if(eesifbusy[1] == 1) // If EE SIF1 is enabled { - if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) { // STS == fromSIF1 - SysPrintf("SIF1 stall control\n"); - } + + if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) SysPrintf("SIF1 stall control\n"); // STS == fromSIF1 if(sif1dma->qwc == 0) // If there's no more to transfer { @@ -383,9 +380,7 @@ _inline void SIF1Dma() // Stop & signal interrupts on EE //sif1dma->chcr &= ~0x100; //hwDmacIrq(6); -#ifdef SIF_LOG SIF_LOG("EE SIF1 End %x\n", sif1.end); -#endif eesifbusy[1] = 0; notDone = 0; INT(6, cycles*BIAS); @@ -406,48 +401,38 @@ _inline void SIF1Dma() } sif1.chain = 1; - id = (ptag[0] >> 28) & 0x7; + id = (ptag[0] >> 28) & 0x7; switch(id) { case 0: // refe -#ifdef SIF_LOG SIF_LOG(" REFE %08X\n", ptag[1]); -#endif sif1.end = 1; sif1dma->madr = ptag[1]; sif1dma->tadr += 16; break; case 1: // cnt -#ifdef SIF_LOG SIF_LOG(" CNT\n"); -#endif sif1dma->madr = sif1dma->tadr + 16; sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4); break; case 2: // next -#ifdef SIF_LOG SIF_LOG(" NEXT %08X\n", ptag[1]); -#endif sif1dma->madr = sif1dma->tadr + 16; sif1dma->tadr = ptag[1]; break; case 3: // ref case 4: // refs -#ifdef SIF_LOG SIF_LOG(" REF %08X\n", ptag[1]); -#endif sif1dma->madr = ptag[1]; sif1dma->tadr += 16; break; case 7: // end -#ifdef SIF_LOG SIF_LOG(" END\n"); -#endif sif1.end = 1; sif1dma->madr = sif1dma->tadr + 16; sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4); @@ -468,7 +453,6 @@ _inline void SIF1Dma() u32 *data; //notDone = 1; - _dmaGetAddr(sif1dma, data, sif1dma->madr, 6); if(qwTransfer > (FIFO_SIF1_W-sif1.fifoSize)/4) // Copy part of sif1dma into FIFO @@ -492,12 +476,9 @@ _inline void SIF1Dma() {*/ int readSize = size; - if(readSize > sif1.fifoSize) - readSize = sif1.fifoSize; + if(readSize > sif1.fifoSize) readSize = sif1.fifoSize; -#ifdef SIF_LOG SIF_LOG(" IOP SIF doing transfer %04X to %08X\n", readSize, HW_DMA10_MADR); -#endif SIF1read((u32*)PSXM(HW_DMA10_MADR), readSize); psxCpu->Clear(HW_DMA10_MADR, readSize); @@ -513,9 +494,7 @@ _inline void SIF1Dma() if(sif1.tagMode & 0x80) // Stop on tag IRQ { // Tag interrupt -#ifdef SIF_LOG SIF_LOG(" IOP SIF interrupt\n"); -#endif //HW_DMA10_CHCR &= ~0x01000000; //reset TR flag //psxDmaInterrupt2(3); iopsifbusy[1] = 0; @@ -527,9 +506,7 @@ _inline void SIF1Dma() else if(sif1.tagMode & 0x40) // Stop on tag END { // End tag. -#ifdef SIF_LOG SIF_LOG(" IOP SIF end\n"); -#endif //HW_DMA10_CHCR &= ~0x01000000; //reset TR flag //psxDmaInterrupt2(3); iopsifbusy[1] = 0; @@ -541,12 +518,8 @@ _inline void SIF1Dma() else if(sif1.fifoSize >= 4) // Read a tag { struct sifData d; - SIF1read((u32*)&d, 4); - -#ifdef SIF_LOG SIF_LOG(" IOP SIF dest chain tag madr:%08X wc:%04X id:%X irq:%d\n", d.data & 0xffffff, d.words, (d.data>>28)&7, (d.data>>31)&1); -#endif HW_DMA10_MADR = d.data & 0xffffff; sif1.counter = d.words; sif1.tagMode = (d.data >> 24) & 0xFF; @@ -554,8 +527,7 @@ _inline void SIF1Dma() } } } - }while(notDone); - + } while (notDone); } _inline void sif0Interrupt() { diff --git a/pcsx2/x86/iVUmicro.c b/pcsx2/x86/iVUmicro.c index e7a2bcb74d..1054f993f1 100644 --- a/pcsx2/x86/iVUmicro.c +++ b/pcsx2/x86/iVUmicro.c @@ -5228,22 +5228,25 @@ void recVUMI_WAITP(VURegs *VU, int info) } // in all EFU insts, EEREC_D is a temp reg -void vuSqSumXYZ(int regd, int regs, int regtemp) +void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2 { + //SysPrintf("VU: SUMXYZ\n"); if( cpucaps.hasStreamingSIMD4Extensions ) { SSE_MOVAPS_XMM_to_XMM(regd, regs); + if (CHECK_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0x8); SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71); } else { SSE_MOVAPS_XMM_to_XMM(regtemp, regs); + if (CHECK_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0x8); SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2 if( cpucaps.hasStreamingSIMD3Extensions ) { SSE3_HADDPS_XMM_to_XMM(regd, regtemp); - SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x + SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2 + SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z } else { SSE_MOVSS_XMM_to_XMM(regd, regtemp); @@ -5252,83 +5255,42 @@ void vuSqSumXYZ(int regd, int regs, int regtemp) SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2 //SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6); + + /* TODO: check if this code does the same thing as above code + SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + */ } } - - //SysPrintf("SUMXYZ\n"); } void recVUMI_ESADD( VURegs *VU, int info) { + //SysPrintf("VU: ESADD\n"); assert( VU == &VU1 ); - //SysPrintf("ESADD\n"); if( EEREC_TEMP == EEREC_D ) { // special code to reset P (don't know if this is still useful!) SysPrintf("ESADD: Resetting P reg!!!\n"); MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0); return; } - if( cpucaps.hasStreamingSIMD4Extensions ) - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71); - } - else - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x - } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - - vuFloat2(EEREC_TEMP, EEREC_D, 0x8); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); + if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); } +const static PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; + void recVUMI_ERSADD( VURegs *VU, int info ) { + //SysPrintf("VU: ERSADD\n"); assert( VU == &VU1 ); - // almost same as vuSqSumXYZ - - if( cpucaps.hasStreamingSIMD4Extensions ) - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71); - } - else - { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x - } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); - + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); // don't use RCPSS (very bad precision) + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); - vuFloat2(EEREC_TEMP, EEREC_D, 0x8); - + if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); } @@ -5338,9 +5300,8 @@ void recVUMI_ELENG( VURegs *VU, int info ) assert( VU == &VU1 ); vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); } void recVUMI_ERLENG( VURegs *VU, int info ) @@ -5349,9 +5310,8 @@ void recVUMI_ERLENG( VURegs *VU, int info ) assert( VU == &VU1 ); vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // sqrt(x^2 + y^2 + z^2) - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // d <- sqrt(x^2 + y^2 + z^2) - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // temp <- 1 + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2) + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2) if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);