messed around with some DMA functions, might fix DMA errors. GIFdma should also be faster, since now it only freezes MMX and XMM regs when it needs to. also did some very minor code cleanups.

p.s. GIFdma() is a mess, it needs to get cleaned-up in the future.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@150 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-09-26 13:37:11 +00:00 committed by Gregory Hainaut
parent 7a70299529
commit 88ea5fb921
5 changed files with 97 additions and 145 deletions

View File

@ -797,14 +797,11 @@ void mechaDecryptBytes(unsigned char* buffer, int size)
int shiftAmount = (cdvd.decSet>>4) & 7; int shiftAmount = (cdvd.decSet>>4) & 7;
int doXor = (cdvd.decSet) & 1; int doXor = (cdvd.decSet) & 1;
int doShift = (cdvd.decSet) & 2; int doShift = (cdvd.decSet) & 2;
unsigned char key = cdvd.Key[4];
for(i=0; i<size; i++) for (i=0; i<size; i++)
{ {
if(doXor) if (doXor) buffer[i] ^= cdvd.Key[4];
buffer[i] ^= key; if (doShift) buffer[i] = (buffer[i]>>shiftAmount) | (buffer[i]<<(8-shiftAmount));
if(doShift)
buffer[i] = (buffer[i]>>shiftAmount) | (buffer[i]<<(8-shiftAmount));
} }
} }

View File

@ -1070,53 +1070,37 @@ int gscount = 0;
static int prevcycles = 0; static int prevcycles = 0;
static u32* prevtag = NULL; static u32* prevtag = NULL;
void GIFdma() { void GIFdma()
{
u32 *ptag; u32 *ptag;
gscycles= prevcycles ? prevcycles: gscycles;
u32 id; u32 id;
gscycles= prevcycles ? prevcycles: gscycles;
/*if ((psHu32(DMAC_CTRL) & 0xC0)) { if( (psHu32(GIF_CTRL) & 8) ) { // temporarily stop
SysPrintf("DMA Stall Control %x\n",(psHu32(DMAC_CTRL) & 0xC0));
}*/
if( (psHu32(GIF_CTRL) & 8) ) {
// temporarily stop
SysPrintf("Gif dma temp paused?\n"); SysPrintf("Gif dma temp paused?\n");
return; return;
} }
#ifdef GIF_LOG GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx\n", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1);
GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n"
" tadr = %lx, asr0 = %lx, asr1 = %lx\n",
gif->chcr, gif->madr, gif->qwc,
gif->tadr, gif->asr0, gif->asr1);
#endif
#ifndef GSPATH3FIX #ifndef GSPATH3FIX
if (psHu32(GIF_MODE) & 0x4) { if ( !(psHu32(GIF_MODE) & 0x4) ) {
} else if (vif1Regs->mskpath3 || psHu32(GIF_MODE) & 0x1) {
if (vif1Regs->mskpath3 || psHu32(GIF_MODE) & 0x1) { gif->chcr &= ~0x100;
gif->chcr &= ~0x100; psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0
psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0 hwDmacIrq(2);
hwDmacIrq(2); return;
return; }
} }
#endif #endif
FreezeXMMRegs(1);
FreezeMMXRegs(1);
if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80 && prevcycles != 0) { // STD == GIF if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80 && prevcycles != 0) { // STD == GIF
SysPrintf("GS Stall Control Source = %x, Drain = %x\n MADR = %x, STADR = %x", (psHu32(0xe000) >> 4) & 0x3, (psHu32(0xe000) >> 6) & 0x3,gif->madr, psHu32(DMAC_STADR)); SysPrintf("GS Stall Control Source = %x, Drain = %x\n MADR = %x, STADR = %x", (psHu32(0xe000) >> 4) & 0x3, (psHu32(0xe000) >> 6) & 0x3, gif->madr, psHu32(DMAC_STADR));
if( gif->madr + (gif->qwc * 16) > psHu32(DMAC_STADR) ) { if( gif->madr + (gif->qwc * 16) > psHu32(DMAC_STADR) ) {
INT(2, gscycles); INT(2, gscycles);
gscycles = 0; gscycles = 0;
FreezeXMMRegs(0);
FreezeMMXRegs(0);
return; return;
} }
prevcycles = 0; prevcycles = 0;
@ -1129,55 +1113,48 @@ void GIFdma() {
psHu32(GIF_STAT)|= 0x10000000; // FQC=31, hack ;) psHu32(GIF_STAT)|= 0x10000000; // FQC=31, hack ;)
#ifdef GSPATH3FIX #ifdef GSPATH3FIX
if (vif1Regs->mskpath3 || psHu32(GIF_MODE) & 0x1){ if (vif1Regs->mskpath3 || psHu32(GIF_MODE) & 0x1) {
if(gif->qwc == 0){ if(gif->qwc == 0) {
if((gif->chcr & 0x10e) == 0x104){ if((gif->chcr & 0x10e) == 0x104) {
ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR
if (ptag == NULL) { //Is ptag empty? if (ptag == NULL) { //Is ptag empty?
psHu32(DMAC_STAT)|= 1<<15; //If yes, set BEIS (BUSERR) in DMAC_STAT register psHu32(DMAC_STAT)|= 1<<15; //If yes, set BEIS (BUSERR) in DMAC_STAT register
FreezeXMMRegs(0); return;
FreezeMMXRegs(0); }
return; gscycles += 2;
} gif->chcr = ( gif->chcr & 0xFFFF ) | ( (*ptag) & 0xFFFF0000 ); //Transfer upper part of tag to CHCR bits 31-15
gscycles += 2; id = (ptag[0] >> 28) & 0x7; //ID for DmaChain copied from bit 28 of the tag
gif->chcr = ( gif->chcr & 0xFFFF ) | ( (*ptag) & 0xFFFF0000 ); //Transfer upper part of tag to CHCR bits 31-15 gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag
id = (ptag[0] >> 28) & 0x7; //ID for DmaChain copied from bit 28 of the tag gif->madr = ptag[1]; //MADR = ADDR field
gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag gspath3done = hwDmacSrcChainWithStack(gif, id);
gif->madr = ptag[1]; //MADR = ADDR field GIF_LOG("PTH3 MASK gifdmaChain %8.8x_%8.8x size=%d, id=%d, addr=%lx\n", ptag[1], ptag[0], gif->qwc, id, gif->madr);
gspath3done = hwDmacSrcChainWithStack(gif, id);
#ifdef GIF_LOG
GIF_LOG("PTH3 MASK gifdmaChain %8.8x_%8.8x size=%d, id=%d, addr=%lx\n",
ptag[1], ptag[0], gif->qwc, id, gif->madr);
#endif
if ((gif->chcr & 0x80) && ptag[0] >> 31) { //Check TIE bit of CHCR and IRQ bit of tag
#ifdef GIF_LOG
GIF_LOG("PATH3 MSK dmaIrq Set\n");
#endif
SysPrintf("GIF TIE\n");
gspath3done |= 1;
}
if ((gif->chcr & 0x80) && ptag[0] >> 31) { //Check TIE bit of CHCR and IRQ bit of tag
GIF_LOG("PATH3 MSK dmaIrq Set\n");
SysPrintf("GIF TIE\n");
gspath3done |= 1;
} }
} }
}
GIFchain(); FreezeXMMRegs(1);
FreezeMMXRegs(1);
GIFchain();
FreezeXMMRegs(0); // Theres a comment below that says not to unfreeze the xmm regs, so not sure about this.
FreezeMMXRegs(0);
if((gspath3done == 1 || (gif->chcr & 0xc) == 0) && gif->qwc == 0){ if((gspath3done == 1 || (gif->chcr & 0xc) == 0) && gif->qwc == 0){
if(gif->qwc > 0)SysPrintf("Horray\n"); if(gif->qwc > 0) SysPrintf("Horray\n");
gspath3done = 0; gspath3done = 0;
gif->chcr &= ~0x100; gif->chcr &= ~0x100;
//psHu32(GIF_MODE)&= ~0x4; //psHu32(GIF_MODE)&= ~0x4;
GSCSRr &= ~0xC000; GSCSRr &= ~0xC000;
GSCSRr |= 0x4000; GSCSRr |= 0x4000;
Path3transfer = 0; Path3transfer = 0;
psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0 psHu32(GIF_STAT)&= ~0x1F000E00; // OPH=0 | APATH=0 | QFC=0
psHu32(GIF_STAT)&= ~0x1F000000; // QFC=0
hwDmacIrq(DMAC_GIF); hwDmacIrq(DMAC_GIF);
} }
FreezeXMMRegs(0);
FreezeMMXRegs(0);
//Dont unfreeze xmm regs here, Masked PATH3 can only be called by VIF, which is already handling it. //Dont unfreeze xmm regs here, Masked PATH3 can only be called by VIF, which is already handling it.
return; return;
} }
@ -1189,28 +1166,27 @@ void GIFdma() {
if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80 && (gif->chcr & 0xc) == 0) { if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80 && (gif->chcr & 0xc) == 0) {
SysPrintf("DMA Stall Control on GIF normal\n"); SysPrintf("DMA Stall Control on GIF normal\n");
} }
GIFchain(); FreezeXMMRegs(1);
if(gif->qwc == 0 && (gif->chcr & 0xc) == 0)gspath3done = 1; FreezeMMXRegs(1);
GIFchain(); //Transfers the data set by the switch
FreezeXMMRegs(0);
FreezeMMXRegs(0);
if(gif->qwc == 0 && (gif->chcr & 0xc) == 0) gspath3done = 1;
} }
else { else {
// Chain Mode // Chain Mode
//#ifndef GSPATH3FIX
while (gspath3done == 0 && gif->qwc == 0) { //Loop if the transfers aren't intermittent while (gspath3done == 0 && gif->qwc == 0) { //Loop if the transfers aren't intermittent
//#endif
ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR
if (ptag == NULL) { //Is ptag empty? if (ptag == NULL) { //Is ptag empty?
psHu32(DMAC_STAT)|= 1<<15; //If yes, set BEIS (BUSERR) in DMAC_STAT register psHu32(DMAC_STAT)|= 1<<15; //If yes, set BEIS (BUSERR) in DMAC_STAT register
FreezeXMMRegs(0);
FreezeMMXRegs(0);
return; return;
} }
gscycles+=2; // Add 1 cycles from the QW read for the tag gscycles+=2; // Add 1 cycles from the QW read for the tag
// Transfer dma tag if tte is set // Transfer dma tag if tte is set
if (gif->chcr & 0x40) { if (gif->chcr & 0x40) {
//u32 temptag[4] = {0}; //u32 temptag[4] = {0};
#ifdef PCSX2_DEVBUILD
//SysPrintf("GIF TTE: %x_%x\n", ptag[3], ptag[2]); //SysPrintf("GIF TTE: %x_%x\n", ptag[3], ptag[2]);
#endif
//temptag[0] = ptag[2]; //temptag[0] = ptag[2];
//temptag[1] = ptag[3]; //temptag[1] = ptag[3];
@ -1223,13 +1199,8 @@ void GIFdma() {
gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag
gif->madr = ptag[1]; //MADR = ADDR field gif->madr = ptag[1]; //MADR = ADDR field
gspath3done = hwDmacSrcChainWithStack(gif, id); gspath3done = hwDmacSrcChainWithStack(gif, id);
#ifdef GIF_LOG GIF_LOG("gifdmaChain %8.8x_%8.8x size=%d, id=%d, addr=%lx\n", ptag[1], ptag[0], gif->qwc, id, gif->madr);
GIF_LOG("gifdmaChain %8.8x_%8.8x size=%d, id=%d, addr=%lx\n",
ptag[1], ptag[0], gif->qwc, id, gif->madr);
#endif
if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80) { // STD == GIF if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80) { // STD == GIF
// there are still bugs, need to also check if gif->madr +16*qwc >= stadr, if not, stall // there are still bugs, need to also check if gif->madr +16*qwc >= stadr, if not, stall
@ -1241,28 +1212,22 @@ void GIFdma() {
hwDmacIrq(13); hwDmacIrq(13);
INT(2, gscycles); INT(2, gscycles);
gscycles = 0; gscycles = 0;
FreezeXMMRegs(0);
FreezeMMXRegs(0);
return; return;
} }
} }
GIFchain(); //Transfers the data set by the switch FreezeXMMRegs(1);
FreezeMMXRegs(1);
GIFchain(); //Transfers the data set by the switch
FreezeXMMRegs(0);
FreezeMMXRegs(0);
if ((gif->chcr & 0x80) && ptag[0] >> 31) { //Check TIE bit of CHCR and IRQ bit of tag if ((gif->chcr & 0x80) && ptag[0] >> 31) { //Check TIE bit of CHCR and IRQ bit of tag
#ifdef GIF_LOG
GIF_LOG("dmaIrq Set\n"); GIF_LOG("dmaIrq Set\n");
#endif
//SysPrintf("GIF TIE\n");
// SysPrintf("GSdmaIrq Set\n");
gspath3done = 1; gspath3done = 1;
//gif->qwc = 0; //gif->qwc = 0;
//break;
} }
//#ifndef GSPATH3FIX
} }
//#endif
} }
prevtag = NULL; prevtag = NULL;
prevcycles = 0; prevcycles = 0;
@ -1270,17 +1235,13 @@ void GIFdma() {
INT(2, gscycles); INT(2, gscycles);
gscycles = 0; gscycles = 0;
} }
FreezeXMMRegs(0);
FreezeMMXRegs(0);
} }
void dmaGIF() { void dmaGIF() {
/*if(vif1Regs->mskpath3 || (psHu32(GIF_MODE) & 0x1)){ //if(vif1Regs->mskpath3 || (psHu32(GIF_MODE) & 0x1)){
INT(2, 48); //Wait time for the buffer to fill, fixes some timing problems in path 3 masking // INT(2, 48); //Wait time for the buffer to fill, fixes some timing problems in path 3 masking
} //It takes the time of 24 QW for the BUS to become ready - The Punisher, And1 Streetball //} //It takes the time of 24 QW for the BUS to become ready - The Punisher, And1 Streetball
else*/ //else
gspath3done = 0; // For some reason this doesnt clear? So when the system starts the thread, we will clear it :) gspath3done = 0; // For some reason this doesnt clear? So when the system starts the thread, we will clear it :)
if(gif->qwc > 0 && (gif->chcr & 0x4) == 0x4) if(gif->qwc > 0 && (gif->chcr & 0x4) == 0x4)
@ -1295,8 +1256,6 @@ void dmaGIF() {
GIFdma(); GIFdma();
} }
#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) #define spr0 ((DMACh*)&PS2MEM_HW[0xD000])
static unsigned int mfifocycles; static unsigned int mfifocycles;

View File

@ -317,24 +317,21 @@ extern __forceinline u8* dmaGetAddr(u32 mem)
u8* p, *pbase; u8* p, *pbase;
mem &= ~0xf; mem &= ~0xf;
#ifdef _DEBUG if( (mem&0xffff0000) == 0x50000000 ) {// reserved scratch pad mem
if( (mem & 0xffff0000) == 0x10000000 ) SysPrintf("dmaGetAddr: reserved scratch pad mem\n");
SysPrintf("dma to/from %x!\n", mem); return (u8*)&PS2MEM_SCRATCH[(mem) & 0x3ff0];
#endif }
if( mem == 0x50000000 ) // reserved scratch pad mem
return NULL;
p = (u8*)dmaGetAddrBase(mem); //, *pbase; p = (u8*)dmaGetAddrBase(mem);
#ifdef _WIN32 #ifdef _WIN32
// do manual LUT since IPU/SPR seems to use addrs 0x3000xxxx quite often // do manual LUT since IPU/SPR seems to use addrs 0x3000xxxx quite often
// linux doesn't suffer from this because it has better vm support // linux doesn't suffer from this because it has better vm support
#ifndef PCSX2_RELEASE
if( memLUT[ (p-PS2MEM_BASE)>>12 ].aPFNs == NULL ) { if( memLUT[ (p-PS2MEM_BASE)>>12 ].aPFNs == NULL ) {
SysPrintf("*PCSX2*: DMA error: %8.8x\n", mem); SysPrintf("dmaGetAddr: memLUT PFN warning\n");
return NULL; return p;
} }
#endif
pbase = (u8*)memLUT[ (p-PS2MEM_BASE)>>12 ].aVFNs[0]; pbase = (u8*)memLUT[ (p-PS2MEM_BASE)>>12 ].aVFNs[0];
if( pbase != NULL ) if( pbase != NULL )
p = pbase + ((u32)p&0xfff); p = pbase + ((u32)p&0xfff);

View File

@ -262,11 +262,12 @@ int _checkXMMreg(int type, int reg, int mode)
for (i=0; i<XMMREGS; i++) { for (i=0; i<XMMREGS; i++) {
if (xmmregs[i].inuse && xmmregs[i].type == (type&0xff) && xmmregs[i].reg == reg ) { if (xmmregs[i].inuse && xmmregs[i].type == (type&0xff) && xmmregs[i].reg == reg ) {
if( !(xmmregs[i].mode & MODE_READ) && (mode&(MODE_READ|MODE_READHALF)) ) { if ( !(xmmregs[i].mode & MODE_READ) ) {
if(mode&MODE_READ) if (mode & MODE_READ) {
SSEX_MOVDQA_M128_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)); SSEX_MOVDQA_M128_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
else { }
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[i]==XMMT_INT ) else if (mode & MODE_READHALF) {
if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[i] == XMMT_INT )
SSE2_MOVQ_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)); SSE2_MOVQ_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
else else
SSE_MOVLPS_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0)); SSE_MOVLPS_M64_to_XMM(i, (uptr)_XMMGetAddr(xmmregs[i].type, xmmregs[i].reg, xmmregs[i].VU ? &VU1 : &VU0));
@ -347,7 +348,7 @@ int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode) {
if (xmmregs[i].type != XMMTYPE_FPREG) continue; if (xmmregs[i].type != XMMTYPE_FPREG) continue;
if (xmmregs[i].reg != fpreg) continue; if (xmmregs[i].reg != fpreg) continue;
if( !(xmmregs[i].mode & MODE_READ) && (mode&MODE_READ)) { if( !(xmmregs[i].mode & MODE_READ) && (mode & MODE_READ)) {
SSE_MOVSS_M32_to_XMM(i, (uptr)&fpuRegs.fpr[fpreg].f); SSE_MOVSS_M32_to_XMM(i, (uptr)&fpuRegs.fpr[fpreg].f);
xmmregs[i].mode |= MODE_READ; xmmregs[i].mode |= MODE_READ;
} }
@ -791,28 +792,25 @@ void _deleteFPtoXMMreg(int reg, int flush)
{ {
int i; int i;
for (i=0; i<XMMREGS; i++) { for (i=0; i<XMMREGS; i++) {
if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_FPREG && xmmregs[i].reg == reg ) { if (xmmregs[i].inuse && xmmregs[i].type == XMMTYPE_FPREG && xmmregs[i].reg == reg ) {
switch(flush) { switch(flush) {
case 0: case 0:
_freeXMMreg(i); _freeXMMreg(i);
break; return;
case 1: case 1:
case 2: if (xmmregs[i].mode & MODE_WRITE) {
if( flush == 1 && (xmmregs[i].mode & MODE_WRITE) ) {
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[reg].UL, i); SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[reg].UL, i);
// get rid of MODE_WRITE since don't want to flush again // get rid of MODE_WRITE since don't want to flush again
xmmregs[i].mode &= ~MODE_WRITE; xmmregs[i].mode &= ~MODE_WRITE;
xmmregs[i].mode |= MODE_READ; xmmregs[i].mode |= MODE_READ;
} }
return;
if( flush == 2 ) case 2:
xmmregs[i].inuse = 0; xmmregs[i].inuse = 0;
break; return;
} }
return;
} }
} }
} }

View File

@ -1308,8 +1308,9 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xm
} }
if( xmminfo & XMMINFO_READS ) { if( xmminfo & XMMINFO_READS ) {
if( (!(xmminfo&XMMINFO_READT)||mmregt>=0) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) {
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
}
else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ);
} }