mirror of https://github.com/PCSX2/pcsx2.git
fixed some bad VU1 opcodes (some opcodes didn't even return the correct register, so the correct result would end up in the wrong reg lol) also cleaned up some DMA code a tiny bit, and added FreezeXMMRegs() where it was needed.
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@194 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
d69dc9a554
commit
2356f75396
|
@ -191,10 +191,12 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) {
|
|||
iopsifbusy[1] = 1;
|
||||
psHu32(0x1000F240) |= 0x4000;
|
||||
if (eesifbusy[1] == 1 && iopsifbusy[1] == 1) {
|
||||
FreezeXMMRegs(1);
|
||||
SIF1Dma();
|
||||
psHu32(0x1000F240) &= ~0x40;
|
||||
psHu32(0x1000F240) &= ~0x100;
|
||||
psHu32(0x1000F240) &= ~0x4000;
|
||||
FreezeXMMRegs(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
38
pcsx2/Sif.c
38
pcsx2/Sif.c
|
@ -368,13 +368,10 @@ _inline void SIF1Dma()
|
|||
notDone = 1;
|
||||
do
|
||||
{
|
||||
|
||||
|
||||
if(eesifbusy[1] == 1) // If EE SIF1 is enabled
|
||||
{
|
||||
if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) { // STS == fromSIF1
|
||||
SysPrintf("SIF1 stall control\n");
|
||||
}
|
||||
|
||||
if ((psHu32(DMAC_CTRL) & 0xC0) == 0xC0) SysPrintf("SIF1 stall control\n"); // STS == fromSIF1
|
||||
|
||||
if(sif1dma->qwc == 0) // If there's no more to transfer
|
||||
{
|
||||
|
@ -383,9 +380,7 @@ _inline void SIF1Dma()
|
|||
// Stop & signal interrupts on EE
|
||||
//sif1dma->chcr &= ~0x100;
|
||||
//hwDmacIrq(6);
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG("EE SIF1 End %x\n", sif1.end);
|
||||
#endif
|
||||
eesifbusy[1] = 0;
|
||||
notDone = 0;
|
||||
INT(6, cycles*BIAS);
|
||||
|
@ -406,48 +401,38 @@ _inline void SIF1Dma()
|
|||
}
|
||||
|
||||
sif1.chain = 1;
|
||||
id = (ptag[0] >> 28) & 0x7;
|
||||
id = (ptag[0] >> 28) & 0x7;
|
||||
|
||||
switch(id)
|
||||
{
|
||||
case 0: // refe
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" REFE %08X\n", ptag[1]);
|
||||
#endif
|
||||
sif1.end = 1;
|
||||
sif1dma->madr = ptag[1];
|
||||
sif1dma->tadr += 16;
|
||||
break;
|
||||
|
||||
case 1: // cnt
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" CNT\n");
|
||||
#endif
|
||||
sif1dma->madr = sif1dma->tadr + 16;
|
||||
sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4);
|
||||
break;
|
||||
|
||||
case 2: // next
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" NEXT %08X\n", ptag[1]);
|
||||
#endif
|
||||
sif1dma->madr = sif1dma->tadr + 16;
|
||||
sif1dma->tadr = ptag[1];
|
||||
break;
|
||||
|
||||
case 3: // ref
|
||||
case 4: // refs
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" REF %08X\n", ptag[1]);
|
||||
#endif
|
||||
sif1dma->madr = ptag[1];
|
||||
sif1dma->tadr += 16;
|
||||
break;
|
||||
|
||||
case 7: // end
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" END\n");
|
||||
#endif
|
||||
sif1.end = 1;
|
||||
sif1dma->madr = sif1dma->tadr + 16;
|
||||
sif1dma->tadr = sif1dma->madr + (sif1dma->qwc << 4);
|
||||
|
@ -468,7 +453,6 @@ _inline void SIF1Dma()
|
|||
u32 *data;
|
||||
|
||||
//notDone = 1;
|
||||
|
||||
_dmaGetAddr(sif1dma, data, sif1dma->madr, 6);
|
||||
|
||||
if(qwTransfer > (FIFO_SIF1_W-sif1.fifoSize)/4) // Copy part of sif1dma into FIFO
|
||||
|
@ -492,12 +476,9 @@ _inline void SIF1Dma()
|
|||
{*/
|
||||
int readSize = size;
|
||||
|
||||
if(readSize > sif1.fifoSize)
|
||||
readSize = sif1.fifoSize;
|
||||
if(readSize > sif1.fifoSize) readSize = sif1.fifoSize;
|
||||
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" IOP SIF doing transfer %04X to %08X\n", readSize, HW_DMA10_MADR);
|
||||
#endif
|
||||
|
||||
SIF1read((u32*)PSXM(HW_DMA10_MADR), readSize);
|
||||
psxCpu->Clear(HW_DMA10_MADR, readSize);
|
||||
|
@ -513,9 +494,7 @@ _inline void SIF1Dma()
|
|||
if(sif1.tagMode & 0x80) // Stop on tag IRQ
|
||||
{
|
||||
// Tag interrupt
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" IOP SIF interrupt\n");
|
||||
#endif
|
||||
//HW_DMA10_CHCR &= ~0x01000000; //reset TR flag
|
||||
//psxDmaInterrupt2(3);
|
||||
iopsifbusy[1] = 0;
|
||||
|
@ -527,9 +506,7 @@ _inline void SIF1Dma()
|
|||
else if(sif1.tagMode & 0x40) // Stop on tag END
|
||||
{
|
||||
// End tag.
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" IOP SIF end\n");
|
||||
#endif
|
||||
//HW_DMA10_CHCR &= ~0x01000000; //reset TR flag
|
||||
//psxDmaInterrupt2(3);
|
||||
iopsifbusy[1] = 0;
|
||||
|
@ -541,12 +518,8 @@ _inline void SIF1Dma()
|
|||
else if(sif1.fifoSize >= 4) // Read a tag
|
||||
{
|
||||
struct sifData d;
|
||||
|
||||
SIF1read((u32*)&d, 4);
|
||||
|
||||
#ifdef SIF_LOG
|
||||
SIF_LOG(" IOP SIF dest chain tag madr:%08X wc:%04X id:%X irq:%d\n", d.data & 0xffffff, d.words, (d.data>>28)&7, (d.data>>31)&1);
|
||||
#endif
|
||||
HW_DMA10_MADR = d.data & 0xffffff;
|
||||
sif1.counter = d.words;
|
||||
sif1.tagMode = (d.data >> 24) & 0xFF;
|
||||
|
@ -554,8 +527,7 @@ _inline void SIF1Dma()
|
|||
}
|
||||
}
|
||||
}
|
||||
}while(notDone);
|
||||
|
||||
} while (notDone);
|
||||
}
|
||||
|
||||
_inline void sif0Interrupt() {
|
||||
|
|
|
@ -5228,22 +5228,25 @@ void recVUMI_WAITP(VURegs *VU, int info)
|
|||
}
|
||||
|
||||
// in all EFU insts, EEREC_D is a temp reg
|
||||
void vuSqSumXYZ(int regd, int regs, int regtemp)
|
||||
void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||
{
|
||||
//SysPrintf("VU: SUMXYZ\n");
|
||||
if( cpucaps.hasStreamingSIMD4Extensions )
|
||||
{
|
||||
SSE_MOVAPS_XMM_to_XMM(regd, regs);
|
||||
if (CHECK_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0x8);
|
||||
SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71);
|
||||
}
|
||||
else
|
||||
{
|
||||
SSE_MOVAPS_XMM_to_XMM(regtemp, regs);
|
||||
if (CHECK_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0x8);
|
||||
SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2
|
||||
|
||||
if( cpucaps.hasStreamingSIMD3Extensions ) {
|
||||
SSE3_HADDPS_XMM_to_XMM(regd, regtemp);
|
||||
SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z
|
||||
SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x
|
||||
SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2
|
||||
SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z
|
||||
}
|
||||
else {
|
||||
SSE_MOVSS_XMM_to_XMM(regd, regtemp);
|
||||
|
@ -5252,83 +5255,42 @@ void vuSqSumXYZ(int regd, int regs, int regtemp)
|
|||
SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz
|
||||
SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2
|
||||
//SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6);
|
||||
|
||||
/* TODO: check if this code does the same thing as above code
|
||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
|
||||
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
//SysPrintf("SUMXYZ\n");
|
||||
}
|
||||
|
||||
void recVUMI_ESADD( VURegs *VU, int info)
|
||||
{
|
||||
//SysPrintf("VU: ESADD\n");
|
||||
assert( VU == &VU1 );
|
||||
//SysPrintf("ESADD\n");
|
||||
if( EEREC_TEMP == EEREC_D ) { // special code to reset P (don't know if this is still useful!)
|
||||
SysPrintf("ESADD: Resetting P reg!!!\n");
|
||||
MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0);
|
||||
return;
|
||||
}
|
||||
if( cpucaps.hasStreamingSIMD4Extensions )
|
||||
{
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||
SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71);
|
||||
}
|
||||
else
|
||||
{
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||
|
||||
if( cpucaps.hasStreamingSIMD3Extensions ) {
|
||||
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
|
||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
|
||||
}
|
||||
else {
|
||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
|
||||
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
}
|
||||
}
|
||||
|
||||
vuFloat2(EEREC_TEMP, EEREC_D, 0x8);
|
||||
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
||||
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
|
||||
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D);
|
||||
}
|
||||
|
||||
const static PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff};
|
||||
|
||||
void recVUMI_ERSADD( VURegs *VU, int info )
|
||||
{
|
||||
//SysPrintf("VU: ERSADD\n");
|
||||
assert( VU == &VU1 );
|
||||
// almost same as vuSqSumXYZ
|
||||
|
||||
if( cpucaps.hasStreamingSIMD4Extensions )
|
||||
{
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||
SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71);
|
||||
}
|
||||
else
|
||||
{
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||
|
||||
if( cpucaps.hasStreamingSIMD3Extensions ) {
|
||||
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
|
||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
|
||||
}
|
||||
else {
|
||||
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
|
||||
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
||||
}
|
||||
}
|
||||
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]);
|
||||
|
||||
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
|
||||
// don't use RCPSS (very bad precision)
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE);
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
|
||||
vuFloat2(EEREC_TEMP, EEREC_D, 0x8);
|
||||
|
||||
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
||||
}
|
||||
|
||||
|
@ -5338,9 +5300,8 @@ void recVUMI_ELENG( VURegs *VU, int info )
|
|||
assert( VU == &VU1 );
|
||||
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
|
||||
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
|
||||
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
|
||||
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
||||
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D);
|
||||
}
|
||||
|
||||
void recVUMI_ERLENG( VURegs *VU, int info )
|
||||
|
@ -5349,9 +5310,8 @@ void recVUMI_ERLENG( VURegs *VU, int info )
|
|||
assert( VU == &VU1 );
|
||||
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
|
||||
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive
|
||||
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // sqrt(x^2 + y^2 + z^2)
|
||||
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // d <- sqrt(x^2 + y^2 + z^2)
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // temp <- 1
|
||||
SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2)
|
||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1
|
||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2)
|
||||
if (CHECK_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
||||
|
|
Loading…
Reference in New Issue