fixed some bad VU1 opcodes (some opcodes didn't even return the correct register, so the correct result would end up in the wrong reg lol) also cleaned up some DMA code a tiny bit, and added FreezeXMMRegs() where it was needed.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@194 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-10-13 06:09:46 +00:00 committed by Gregory Hainaut
parent d69dc9a554
commit 2356f75396
3 changed files with 34 additions and 100 deletions

View File

@ -191,10 +191,12 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) {
iopsifbusy[1] = 1;
psHu32(0x1000F240) |= 0x4000;
if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) {
FreezeXMMRegs(1);
SIF1Dma();
psHu32(0x1000F240) &= ~0x40;
psHu32(0x1000F240) &= ~0x100;
psHu32(0x1000F240) &= ~0x4000;
FreezeXMMRegs(0);
}
}

View File

@ -368,13 +368,10 @@ _inline void SIF1Dma()
notDone = 1;
do
{
if(eesifbusy[1] == 1) // If EE SIF1 is enabled
{
if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) { // STS == fromSIF1
SysPrintf("SIF1 stall control\n");
}
if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) SysPrintf("SIF1 stall control\n"); // STS == fromSIF1
if(sif1dma->qwc == 0) // If there's no more to transfer
{
@ -383,9 +380,7 @@ _inline void SIF1Dma()
// Stop & signal interrupts on EE
//sif1dma->chcr &= ~0x100;
//hwDmacIrq(6);
#ifdef SIF_LOG
SIF_LOG("EE SIF1 End %x\n", sif1.end);
#endif
eesifbusy[1] = 0;
notDone = 0;
INT(6, cycles*BIAS);
@ -406,48 +401,38 @@ _inline void SIF1Dma()
}
sif1.chain = 1;
id = (ptag[0] >> 28) & 0x7;
id = (ptag[0] >> 28) & 0x7;
switch(id)
{
case 0: // refe
#ifdef SIF_LOG
SIF_LOG(" REFE %08X\n", ptag[1]);
#endif
sif1.end = 1;
sif1dma->madr = ptag[1];
sif1dma->tadr += 16;
break;
case 1: // cnt
#ifdef SIF_LOG
SIF_LOG(" CNT\n");
#endif
sif1dma->madr = sif1dma->tadr + 16;
sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4);
break;
case 2: // next
#ifdef SIF_LOG
SIF_LOG(" NEXT %08X\n", ptag[1]);
#endif
sif1dma->madr = sif1dma->tadr + 16;
sif1dma->tadr = ptag[1];
break;
case 3: // ref
case 4: // refs
#ifdef SIF_LOG
SIF_LOG(" REF %08X\n", ptag[1]);
#endif
sif1dma->madr = ptag[1];
sif1dma->tadr += 16;
break;
case 7: // end
#ifdef SIF_LOG
SIF_LOG(" END\n");
#endif
sif1.end = 1;
sif1dma->madr = sif1dma->tadr + 16;
sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4);
@ -468,7 +453,6 @@ _inline void SIF1Dma()
u32 *data;
//notDone = 1;
_dmaGetAddr(sif1dma, data, sif1dma->madr, 6);
if(qwTransfer > (FIFO_SIF1_W-sif1.fifoSize)/4) // Copy part of sif1dma into FIFO
@ -492,12 +476,9 @@ _inline void SIF1Dma()
{*/
int readSize = size;
if(readSize > sif1.fifoSize)
readSize = sif1.fifoSize;
if(readSize > sif1.fifoSize) readSize = sif1.fifoSize;
#ifdef SIF_LOG
SIF_LOG(" IOP SIF doing transfer %04X to %08X\n", readSize, HW_DMA10_MADR);
#endif
SIF1read((u32*)PSXM(HW_DMA10_MADR), readSize);
psxCpu->Clear(HW_DMA10_MADR, readSize);
@ -513,9 +494,7 @@ _inline void SIF1Dma()
if(sif1.tagMode & 0x80) // Stop on tag IRQ
{
// Tag interrupt
#ifdef SIF_LOG
SIF_LOG(" IOP SIF interrupt\n");
#endif
//HW_DMA10_CHCR &= ~0x01000000; //reset TR flag
//psxDmaInterrupt2(3);
iopsifbusy[1] = 0;
@ -527,9 +506,7 @@ _inline void SIF1Dma()
else if(sif1.tagMode & 0x40) // Stop on tag END
{
// End tag.
#ifdef SIF_LOG
SIF_LOG(" IOP SIF end\n");
#endif
//HW_DMA10_CHCR &= ~0x01000000; //reset TR flag
//psxDmaInterrupt2(3);
iopsifbusy[1] = 0;
@ -541,12 +518,8 @@ _inline void SIF1Dma()
else if(sif1.fifoSize >= 4) // Read a tag
{
struct sifData d;
SIF1read((u32*)&d, 4);
#ifdef SIF_LOG
SIF_LOG(" IOP SIF dest chain tag madr:%08X wc:%04X id:%X irq:%d\n", d.data & 0xffffff, d.words, (d.data>>28)&7, (d.data>>31)&1);
#endif
HW_DMA10_MADR = d.data & 0xffffff;
sif1.counter = d.words;
sif1.tagMode = (d.data >> 24) & 0xFF;
@ -554,8 +527,7 @@ _inline void SIF1Dma()
}
}
}
}while(notDone);
} while (notDone);
}
_inline void sif0Interrupt() {

View File

@ -5228,22 +5228,25 @@ void recVUMI_WAITP(VURegs *VU, int info)
}
// in all EFU insts, EEREC_D is a temp reg
void vuSqSumXYZ(int regd, int regs, int regtemp)
void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2
{
//SysPrintf("VU: SUMXYZ\n");
if( cpucaps.hasStreamingSIMD4Extensions )
{
SSE_MOVAPS_XMM_to_XMM(regd, regs);
if (CHECK_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0x8);
SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71);
}
else
{
SSE_MOVAPS_XMM_to_XMM(regtemp, regs);
if (CHECK_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0x8);
SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2
if( cpucaps.hasStreamingSIMD3Extensions ) {
SSE3_HADDPS_XMM_to_XMM(regd, regtemp);
SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z
SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x
SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2
SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z
}
else {
SSE_MOVSS_XMM_to_XMM(regd, regtemp);
@ -5252,83 +5255,42 @@ void vuSqSumXYZ(int regd, int regs, int regtemp)
SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz
SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2
//SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6);
/* TODO: check if this code does the same thing as above code
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
*/
}
}
//SysPrintf("SUMXYZ\n");
}
void recVUMI_ESADD( VURegs *VU, int info)
{
//SysPrintf("VU: ESADD\n");
assert( VU == &VU1 );
//SysPrintf("ESADD\n");
if( EEREC_TEMP == EEREC_D ) { // special code to reset P (don't know if this is still useful!)
SysPrintf("ESADD: Resetting P reg!!!\n");
MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0);
return;
}
if( cpucaps.hasStreamingSIMD4Extensions )
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71);
}
else
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
if( cpucaps.hasStreamingSIMD3Extensions ) {
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
}
else {
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
vuFloat2(EEREC_TEMP, EEREC_D, 0x8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D);
}
const static PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff};
void recVUMI_ERSADD( VURegs *VU, int info )
{
//SysPrintf("VU: ERSADD\n");
assert( VU == &VU1 );
// almost same as vuSqSumXYZ
if( cpucaps.hasStreamingSIMD4Extensions )
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71);
}
else
{
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
if( cpucaps.hasStreamingSIMD3Extensions ) {
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
}
else {
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]);
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
// don't use RCPSS (very bad precision)
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE);
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
vuFloat2(EEREC_TEMP, EEREC_D, 0x8);
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
}
@ -5338,9 +5300,8 @@ void recVUMI_ELENG( VURegs *VU, int info )
assert( VU == &VU1 );
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D);
}
void recVUMI_ERLENG( VURegs *VU, int info )
@ -5349,9 +5310,8 @@ void recVUMI_ERLENG( VURegs *VU, int info )
assert( VU == &VU1 );
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // sqrt(x^2 + y^2 + z^2)
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // d <- sqrt(x^2 + y^2 + z^2)
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // temp <- 1
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2)
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2)
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);