fixed some bad VU1 opcodes (some opcodes didn't even return the correct register, so the correct result would end up in the wrong reg lol) also cleaned up some DMA code a tiny bit, and added FreezeXMMRegs() where it was needed.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@194 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-10-13 06:09:46 +00:00 committed by Gregory Hainaut
parent d69dc9a554
commit 2356f75396
3 changed files with 34 additions and 100 deletions

View File

@ -191,10 +191,12 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) {
iopsifbusy[1] = 1; iopsifbusy[1] = 1;
psHu32(0x1000F240) |= 0x4000; psHu32(0x1000F240) |= 0x4000;
if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) { if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) {
FreezeXMMRegs(1);
SIF1Dma(); SIF1Dma();
psHu32(0x1000F240) &= ~0x40; psHu32(0x1000F240) &= ~0x40;
psHu32(0x1000F240) &= ~0x100; psHu32(0x1000F240) &= ~0x100;
psHu32(0x1000F240) &= ~0x4000; psHu32(0x1000F240) &= ~0x4000;
FreezeXMMRegs(0);
} }
} }

View File

@ -368,13 +368,10 @@ _inline void SIF1Dma()
notDone = 1; notDone = 1;
do do
{ {
if(eesifbusy[1] == 1) // If EE SIF1 is enabled if(eesifbusy[1] == 1) // If EE SIF1 is enabled
{ {
if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) { // STS == fromSIF1
SysPrintf("SIF1 stall control\n"); if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) SysPrintf("SIF1 stall control\n"); // STS == fromSIF1
}
if(sif1dma->qwc == 0) // If there's no more to transfer if(sif1dma->qwc == 0) // If there's no more to transfer
{ {
@ -383,9 +380,7 @@ _inline void SIF1Dma()
// Stop & signal interrupts on EE // Stop & signal interrupts on EE
//sif1dma->chcr &= ~0x100; //sif1dma->chcr &= ~0x100;
//hwDmacIrq(6); //hwDmacIrq(6);
#ifdef SIF_LOG
SIF_LOG("EE SIF1 End %x\n", sif1.end); SIF_LOG("EE SIF1 End %x\n", sif1.end);
#endif
eesifbusy[1] = 0; eesifbusy[1] = 0;
notDone = 0; notDone = 0;
INT(6, cycles*BIAS); INT(6, cycles*BIAS);
@ -406,48 +401,38 @@ _inline void SIF1Dma()
} }
sif1.chain = 1; sif1.chain = 1;
id = (ptag[0] >> 28) & 0x7; id = (ptag[0] >> 28) & 0x7;
switch(id) switch(id)
{ {
case 0: // refe case 0: // refe
#ifdef SIF_LOG
SIF_LOG(" REFE %08X\n", ptag[1]); SIF_LOG(" REFE %08X\n", ptag[1]);
#endif
sif1.end = 1; sif1.end = 1;
sif1dma->madr = ptag[1]; sif1dma->madr = ptag[1];
sif1dma->tadr += 16; sif1dma->tadr += 16;
break; break;
case 1: // cnt case 1: // cnt
#ifdef SIF_LOG
SIF_LOG(" CNT\n"); SIF_LOG(" CNT\n");
#endif
sif1dma->madr = sif1dma->tadr + 16; sif1dma->madr = sif1dma->tadr + 16;
sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4); sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4);
break; break;
case 2: // next case 2: // next
#ifdef SIF_LOG
SIF_LOG(" NEXT %08X\n", ptag[1]); SIF_LOG(" NEXT %08X\n", ptag[1]);
#endif
sif1dma->madr = sif1dma->tadr + 16; sif1dma->madr = sif1dma->tadr + 16;
sif1dma->tadr = ptag[1]; sif1dma->tadr = ptag[1];
break; break;
case 3: // ref case 3: // ref
case 4: // refs case 4: // refs
#ifdef SIF_LOG
SIF_LOG(" REF %08X\n", ptag[1]); SIF_LOG(" REF %08X\n", ptag[1]);
#endif
sif1dma->madr = ptag[1]; sif1dma->madr = ptag[1];
sif1dma->tadr += 16; sif1dma->tadr += 16;
break; break;
case 7: // end case 7: // end
#ifdef SIF_LOG
SIF_LOG(" END\n"); SIF_LOG(" END\n");
#endif
sif1.end = 1; sif1.end = 1;
sif1dma->madr = sif1dma->tadr + 16; sif1dma->madr = sif1dma->tadr + 16;
sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4); sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4);
@ -468,7 +453,6 @@ _inline void SIF1Dma()
u32 *data; u32 *data;
//notDone = 1; //notDone = 1;
_dmaGetAddr(sif1dma, data, sif1dma->madr, 6); _dmaGetAddr(sif1dma, data, sif1dma->madr, 6);
if(qwTransfer > (FIFO_SIF1_W-sif1.fifoSize)/4) // Copy part of sif1dma into FIFO if(qwTransfer > (FIFO_SIF1_W-sif1.fifoSize)/4) // Copy part of sif1dma into FIFO
@ -492,12 +476,9 @@ _inline void SIF1Dma()
{*/ {*/
int readSize = size; int readSize = size;
if(readSize > sif1.fifoSize) if(readSize > sif1.fifoSize) readSize = sif1.fifoSize;
readSize = sif1.fifoSize;
#ifdef SIF_LOG
SIF_LOG(" IOP SIF doing transfer %04X to %08X\n", readSize, HW_DMA10_MADR); SIF_LOG(" IOP SIF doing transfer %04X to %08X\n", readSize, HW_DMA10_MADR);
#endif
SIF1read((u32*)PSXM(HW_DMA10_MADR), readSize); SIF1read((u32*)PSXM(HW_DMA10_MADR), readSize);
psxCpu->Clear(HW_DMA10_MADR, readSize); psxCpu->Clear(HW_DMA10_MADR, readSize);
@ -513,9 +494,7 @@ _inline void SIF1Dma()
if(sif1.tagMode & 0x80) // Stop on tag IRQ if(sif1.tagMode & 0x80) // Stop on tag IRQ
{ {
// Tag interrupt // Tag interrupt
#ifdef SIF_LOG
SIF_LOG(" IOP SIF interrupt\n"); SIF_LOG(" IOP SIF interrupt\n");
#endif
//HW_DMA10_CHCR &= ~0x01000000; //reset TR flag //HW_DMA10_CHCR &= ~0x01000000; //reset TR flag
//psxDmaInterrupt2(3); //psxDmaInterrupt2(3);
iopsifbusy[1] = 0; iopsifbusy[1] = 0;
@ -527,9 +506,7 @@ _inline void SIF1Dma()
else if(sif1.tagMode & 0x40) // Stop on tag END else if(sif1.tagMode & 0x40) // Stop on tag END
{ {
// End tag. // End tag.
#ifdef SIF_LOG
SIF_LOG(" IOP SIF end\n"); SIF_LOG(" IOP SIF end\n");
#endif
//HW_DMA10_CHCR &= ~0x01000000; //reset TR flag //HW_DMA10_CHCR &= ~0x01000000; //reset TR flag
//psxDmaInterrupt2(3); //psxDmaInterrupt2(3);
iopsifbusy[1] = 0; iopsifbusy[1] = 0;
@ -541,12 +518,8 @@ _inline void SIF1Dma()
else if(sif1.fifoSize >= 4) // Read a tag else if(sif1.fifoSize >= 4) // Read a tag
{ {
struct sifData d; struct sifData d;
SIF1read((u32*)&d, 4); SIF1read((u32*)&d, 4);
#ifdef SIF_LOG
SIF_LOG(" IOP SIF dest chain tag madr:%08X wc:%04X id:%X irq:%d\n", d.data & 0xffffff, d.words, (d.data>>28)&7, (d.data>>31)&1); SIF_LOG(" IOP SIF dest chain tag madr:%08X wc:%04X id:%X irq:%d\n", d.data & 0xffffff, d.words, (d.data>>28)&7, (d.data>>31)&1);
#endif
HW_DMA10_MADR = d.data & 0xffffff; HW_DMA10_MADR = d.data & 0xffffff;
sif1.counter = d.words; sif1.counter = d.words;
sif1.tagMode = (d.data >> 24) & 0xFF; sif1.tagMode = (d.data >> 24) & 0xFF;
@ -554,8 +527,7 @@ _inline void SIF1Dma()
} }
} }
} }
}while(notDone); } while (notDone);
} }
_inline void sif0Interrupt() { _inline void sif0Interrupt() {

View File

@ -5228,22 +5228,25 @@ void recVUMI_WAITP(VURegs *VU, int info)
} }
// in all EFU insts, EEREC_D is a temp reg // in all EFU insts, EEREC_D is a temp reg
void vuSqSumXYZ(int regd, int regs, int regtemp) void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2
{ {
//SysPrintf("VU: SUMXYZ\n");
if( cpucaps.hasStreamingSIMD4Extensions ) if( cpucaps.hasStreamingSIMD4Extensions )
{ {
SSE_MOVAPS_XMM_to_XMM(regd, regs); SSE_MOVAPS_XMM_to_XMM(regd, regs);
if (CHECK_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0x8);
SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71); SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71);
} }
else else
{ {
SSE_MOVAPS_XMM_to_XMM(regtemp, regs); SSE_MOVAPS_XMM_to_XMM(regtemp, regs);
if (CHECK_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0x8);
SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2 SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2
if( cpucaps.hasStreamingSIMD3Extensions ) { if( cpucaps.hasStreamingSIMD3Extensions ) {
SSE3_HADDPS_XMM_to_XMM(regd, regtemp); SSE3_HADDPS_XMM_to_XMM(regd, regtemp);
SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2
SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z
} }
else { else {
SSE_MOVSS_XMM_to_XMM(regd, regtemp); SSE_MOVSS_XMM_to_XMM(regd, regtemp);
@ -5252,83 +5255,42 @@ void vuSqSumXYZ(int regd, int regs, int regtemp)
SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz
SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2 SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2
//SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6); //SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6);
/* TODO: check if this code does the same thing as above code
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
*/
} }
} }
//SysPrintf("SUMXYZ\n");
} }
void recVUMI_ESADD( VURegs *VU, int info) void recVUMI_ESADD( VURegs *VU, int info)
{ {
//SysPrintf("VU: ESADD\n");
assert( VU == &VU1 ); assert( VU == &VU1 );
//SysPrintf("ESADD\n");
if( EEREC_TEMP == EEREC_D ) { // special code to reset P (don't know if this is still useful!) if( EEREC_TEMP == EEREC_D ) { // special code to reset P (don't know if this is still useful!)
SysPrintf("ESADD: Resetting P reg!!!\n"); SysPrintf("ESADD: Resetting P reg!!!\n");
MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0); MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0);
return; return;
} }
if( cpucaps.hasStreamingSIMD4Extensions ) vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
{ if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D);
SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71);
}
else
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
if( cpucaps.hasStreamingSIMD3Extensions ) {
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
}
else {
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
vuFloat2(EEREC_TEMP, EEREC_D, 0x8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
} }
const static PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff};
void recVUMI_ERSADD( VURegs *VU, int info ) void recVUMI_ERSADD( VURegs *VU, int info )
{ {
//SysPrintf("VU: ERSADD\n");
assert( VU == &VU1 ); assert( VU == &VU1 );
// almost same as vuSqSumXYZ vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
if( cpucaps.hasStreamingSIMD4Extensions )
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71);
}
else
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
if( cpucaps.hasStreamingSIMD3Extensions ) {
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
}
else {
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]);
// don't use RCPSS (very bad precision) // don't use RCPSS (very bad precision)
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE);
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
vuFloat2(EEREC_TEMP, EEREC_D, 0x8); if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
} }
@ -5338,9 +5300,8 @@ void recVUMI_ELENG( VURegs *VU, int info )
assert( VU == &VU1 ); assert( VU == &VU1 );
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
} }
void recVUMI_ERLENG( VURegs *VU, int info ) void recVUMI_ERLENG( VURegs *VU, int info )
@ -5349,9 +5310,8 @@ void recVUMI_ERLENG( VURegs *VU, int info )
assert( VU == &VU1 ); assert( VU == &VU1 );
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // sqrt(x^2 + y^2 + z^2) SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2)
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // d <- sqrt(x^2 + y^2 + z^2) SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // temp <- 1
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2) SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2)
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);