IPU: Cleanups and simplifications, and removed a whole lot of code that was force-setting ipu0dma's STR to 0 and/or flushing the FIFO for no reason. Tested tons of games, couldn't find any regressions.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3831 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-09-24 23:48:33 +00:00
parent 36d1503581
commit 9ebace0a8d
5 changed files with 183 additions and 233 deletions

View File

@ -26,17 +26,10 @@
#include "Vif_Dma.h" #include "Vif_Dma.h"
#include <limits.h> #include <limits.h>
static __fi void IPU_INT0_FROM()
{
if (ipu0dma.qwc > 0 && ipu0dma.chcr.STR) ipu0Interrupt();
}
tIPU_cmd ipu_cmd;
void ReorderBitstream();
// the BP doesn't advance and returns -1 if there is no data to be read // the BP doesn't advance and returns -1 if there is no data to be read
__aligned16 tIPU_cmd ipu_cmd;
__aligned16 tIPU_BP g_BP; __aligned16 tIPU_BP g_BP;
__aligned16 decoder_t decoder;
void IPUWorker(); void IPUWorker();
@ -53,11 +46,19 @@ int coded_block_pattern = 0;
u8 indx4[16*16/2]; u8 indx4[16*16/2];
__aligned16 decoder_t decoder;
void tIPU_cmd::clear()
{
memzero_sse_a(*this);
//memzero(*this);
current = 0xffffffff;
}
__fi void IPUProcessInterrupt() __fi void IPUProcessInterrupt()
{ {
if (ipuRegs.ctrl.BUSY && g_BP.IFC) IPUWorker(); if (ipuRegs.ctrl.BUSY) // && (g_BP.FP || g_BP.IFC || (ipu1dma.chcr.STR && ipu1dma.qwc > 0)))
IPUWorker();
} }
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
@ -194,7 +195,7 @@ __fi u32 ipuRead32(u32 mem)
pxAssert((mem & ~0xff) == 0x10002000); pxAssert((mem & ~0xff) == 0x10002000);
mem &= 0xff; // ipu repeats every 0x100 mem &= 0xff; // ipu repeats every 0x100
//IPUProcessInterrupt(); IPUProcessInterrupt();
switch (mem) switch (mem)
{ {
@ -236,7 +237,7 @@ __fi u64 ipuRead64(u32 mem)
pxAssert((mem & ~0xff) == 0x10002000); pxAssert((mem & ~0xff) == 0x10002000);
mem &= 0xff; // ipu repeats every 0x100 mem &= 0xff; // ipu repeats every 0x100
//IPUProcessInterrupt(); IPUProcessInterrupt();
switch (mem) switch (mem)
{ {
@ -286,13 +287,12 @@ __fi bool ipuWrite32(u32 mem, u32 value)
pxAssert((mem & ~0xfff) == 0x10002000); pxAssert((mem & ~0xfff) == 0x10002000);
mem &= 0xfff; mem &= 0xfff;
IPUProcessInterrupt();
switch (mem) switch (mem)
{ {
ipucase(IPU_CMD): // IPU_CMD ipucase(IPU_CMD): // IPU_CMD
IPU_LOG("write32: IPU_CMD=0x%08X", value); IPU_LOG("write32: IPU_CMD=0x%08X", value);
IPUCMD_WRITE(value); IPUCMD_WRITE(value);
IPUProcessInterrupt();
return false; return false;
ipucase(IPU_CTRL): // IPU_CTRL ipucase(IPU_CTRL): // IPU_CTRL
@ -323,13 +323,12 @@ __fi bool ipuWrite64(u32 mem, u64 value)
pxAssert((mem & ~0xfff) == 0x10002000); pxAssert((mem & ~0xfff) == 0x10002000);
mem &= 0xfff; mem &= 0xfff;
IPUProcessInterrupt();
switch (mem) switch (mem)
{ {
ipucase(IPU_CMD): ipucase(IPU_CMD):
IPU_LOG("write64: IPU_CMD=0x%08X", value); IPU_LOG("write64: IPU_CMD=0x%08X", value);
IPUCMD_WRITE((u32)value); IPUCMD_WRITE((u32)value);
IPUProcessInterrupt();
return false; return false;
} }
@ -352,72 +351,56 @@ static void ipuBCLR(u32 val)
IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP); IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP);
} }
static bool ipuIDEC(u32 val, bool resume) static __ri void ipuIDEC(tIPU_CMD_IDEC idec)
{ {
tIPU_CMD_IDEC idec(val); idec.log();
if (!resume)
{
idec.log();
g_BP.Advance(idec.FB);
//from IPU_CTRL //from IPU_CTRL
ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;) ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;)
decoder.coding_type = ipuRegs.ctrl.PCT; decoder.coding_type = ipuRegs.ctrl.PCT;
decoder.mpeg1 = ipuRegs.ctrl.MP1; decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST; decoder.q_scale_type = ipuRegs.ctrl.QST;
decoder.intra_vlc_format = ipuRegs.ctrl.IVF; decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS; decoder.scantype = ipuRegs.ctrl.AS;
decoder.intra_dc_precision = ipuRegs.ctrl.IDP; decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
//from IDEC value //from IDEC value
decoder.quantizer_scale = idec.QSC; decoder.quantizer_scale = idec.QSC;
decoder.frame_pred_frame_dct= !idec.DTD; decoder.frame_pred_frame_dct= !idec.DTD;
decoder.sgn = idec.SGN; decoder.sgn = idec.SGN;
decoder.dte = idec.DTE; decoder.dte = idec.DTE;
decoder.ofm = idec.OFM; decoder.ofm = idec.OFM;
//other stuff //other stuff
decoder.dcr = 1; // resets DC prediction value decoder.dcr = 1; // resets DC prediction value
}
return mpeg2sliceIDEC();
} }
static int s_bdec = 0; static int s_bdec = 0;
static __fi bool ipuBDEC(u32 val, bool resume) static __ri void ipuBDEC(tIPU_CMD_BDEC bdec)
{ {
tIPU_CMD_BDEC bdec(val); bdec.log(s_bdec);
if (IsDebugBuild) s_bdec++;
if (!resume) decoder.coding_type = I_TYPE;
{ decoder.mpeg1 = ipuRegs.ctrl.MP1;
bdec.log(s_bdec); decoder.q_scale_type = ipuRegs.ctrl.QST;
if (IsDebugBuild) s_bdec++; decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS;
g_BP.Advance(bdec.FB); decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
decoder.coding_type = I_TYPE;
decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST;
decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS;
decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
//from BDEC value //from BDEC value
decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1; decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0; decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
decoder.dcr = bdec.DCR; decoder.dcr = bdec.DCR;
decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN; decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
memzero_sse_a(decoder.mb8); memzero_sse_a(decoder.mb8);
memzero_sse_a(decoder.mb16); memzero_sse_a(decoder.mb16);
}
return mpeg2_slice();
} }
static bool __fastcall ipuVDEC(u32 val) static __fi bool ipuVDEC(u32 val)
{ {
switch (ipu_cmd.pos[0]) switch (ipu_cmd.pos[0])
{ {
@ -483,7 +466,7 @@ static bool __fastcall ipuVDEC(u32 val)
return false; return false;
} }
static __fi bool ipuFDEC(u32 val) static __ri bool ipuFDEC(u32 val)
{ {
if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false; if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
@ -839,206 +822,154 @@ u8 getBits8(u8 *address, bool advance)
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// IPU Worker / Dispatcher // IPU Worker / Dispatcher
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
void IPUCMD_WRITE(u32 val)
// When a command is written, we set some various busy flags and clear some other junk.
// The actual decoding will be handled by IPUworker.
__fi void IPUCMD_WRITE(u32 val)
{ {
// don't process anything if currently busy // don't process anything if currently busy
if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread //if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread
ipuRegs.ctrl.ECD = 0; ipuRegs.ctrl.ECD = 0;
ipuRegs.ctrl.SCD = 0; //clear ECD/SCD ipuRegs.ctrl.SCD = 0;
ipu_cmd.clear(); ipu_cmd.clear();
ipu_cmd.current = val; ipu_cmd.current = val;
switch (val >> 28) switch (ipu_cmd.CMD)
{ {
// BCLR and SETTH require no data so they always execute inline:
case SCE_IPU_BCLR: case SCE_IPU_BCLR:
ipuBCLR(val); ipuBCLR(val);
hwIntcIrq(INTC_IPU); //DMAC_TO_IPU hwIntcIrq(INTC_IPU); //DMAC_TO_IPU
ipuRegs.ctrl.BUSY = 0;
return; return;
case SCE_IPU_VDEC:
g_BP.Advance(val & 0x3F);
// check if enough data in queue
if (ipuVDEC(val)) return;
ipuRegs.cmd.BUSY = 0x80000000;
ipuRegs.topbusy = 0x80000000;
break;
case SCE_IPU_FDEC:
IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x",
val & 0x3f, g_BP.IFC, (int)g_BP.BP, ipu1dma.chcr._u32);
g_BP.Advance(val & 0x3F);
if (ipuFDEC(val)) return;
ipuRegs.cmd.BUSY = 0x80000000;
ipuRegs.topbusy = 0x80000000;
break;
case SCE_IPU_SETTH: case SCE_IPU_SETTH:
ipuSETTH(val); ipuSETTH(val);
hwIntcIrq(INTC_IPU); hwIntcIrq(INTC_IPU);
ipuRegs.ctrl.BUSY = 0;
return; return;
case SCE_IPU_IDEC:
g_BP.Advance(val & 0x3F);
ipuIDEC(val);
ipuRegs.SetTopBusy();
break;
case SCE_IPU_BDEC:
g_BP.Advance(val & 0x3F);
ipuBDEC(val);
ipuRegs.SetTopBusy();
break;
case SCE_IPU_VDEC:
g_BP.Advance(val & 0x3F);
ipuRegs.SetDataBusy();
break;
case SCE_IPU_FDEC:
IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x",
val & 0x3f, g_BP.IFC, g_BP.BP, ipu1dma.chcr._u32);
g_BP.Advance(val & 0x3F);
ipuRegs.SetDataBusy();
break;
case SCE_IPU_SETIQ: case SCE_IPU_SETIQ:
IPU_LOG("SETIQ command."); IPU_LOG("SETIQ command.");
if (val & 0x3f) IPU_LOG("Skip %d bits.", val & 0x3f);
g_BP.Advance(val & 0x3F); g_BP.Advance(val & 0x3F);
if (ipuSETIQ(val)) return;
break; break;
case SCE_IPU_SETVQ: case SCE_IPU_SETVQ:
if (ipuSETVQ(val)) return;
break; break;
case SCE_IPU_CSC: case SCE_IPU_CSC:
ipu_cmd.pos[1] = 0; ipu_cmd.pos[1] = 0;
ipu_cmd.index = 0; ipu_cmd.index = 0;
if (ipuCSC(val))
{
IPU_INT0_FROM();
return;
}
break; break;
case SCE_IPU_PACK: case SCE_IPU_PACK:
ipu_cmd.pos[1] = 0; ipu_cmd.pos[1] = 0;
ipu_cmd.index = 0; ipu_cmd.index = 0;
if (ipuPACK(val)) return;
break; break;
case SCE_IPU_IDEC: jNO_DEFAULT;
if (ipuIDEC(val, false))
{
// idec done, ipu0 done too
IPU_INT0_FROM();
return;
}
ipuRegs.topbusy = 0x80000000;
break;
case SCE_IPU_BDEC:
if (ipuBDEC(val, false))
{
IPU_INT0_FROM();
if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
}
else
{
ipuRegs.topbusy = 0x80000000;
}
break;
} }
// have to resort to the thread
ipuRegs.ctrl.BUSY = 1; ipuRegs.ctrl.BUSY = 1;
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
//if(!ipu1dma.chcr.STR) hwIntcIrq(INTC_IPU);
} }
void IPUWorker() __noinline void IPUWorker()
{ {
pxAssert(ipuRegs.ctrl.BUSY); pxAssert(ipuRegs.ctrl.BUSY);
switch (ipu_cmd.CMD) switch (ipu_cmd.CMD)
{ {
case SCE_IPU_VDEC: // These are unreachable (BUSY will always be 0 for them)
if (!ipuVDEC(ipu_cmd.current)) //case SCE_IPU_BCLR:
{ //case SCE_IPU_SETTH:
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); //break;
return;
}
ipuRegs.cmd.BUSY = 0;
ipuRegs.topbusy = 0;
break;
case SCE_IPU_FDEC:
if (!ipuFDEC(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
ipuRegs.cmd.BUSY = 0;
ipuRegs.topbusy = 0;
break;
case SCE_IPU_SETIQ:
if (!ipuSETIQ(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
break;
case SCE_IPU_SETVQ:
if (!ipuSETVQ(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
break;
case SCE_IPU_CSC:
if (!ipuCSC(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
IPU_INT0_FROM();
break;
case SCE_IPU_PACK:
if (!ipuPACK(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
break;
case SCE_IPU_IDEC: case SCE_IPU_IDEC:
if (!ipuIDEC(ipu_cmd.current, true)) if (!mpeg2sliceIDEC()) return;
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
ipuRegs.ctrl.OFC = 0; //ipuRegs.ctrl.OFC = 0;
ipuRegs.ctrl.BUSY = 0;
ipuRegs.topbusy = 0; ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0; ipuRegs.cmd.BUSY = 0;
ipu_cmd.current = 0xffffffff;
// CHECK!: IPU0dma remains when IDEC is done, so we need to clear it // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
IPU_INT0_FROM(); //IPU_INT0_FROM();
break; break;
case SCE_IPU_BDEC: case SCE_IPU_BDEC:
if (!ipuBDEC(ipu_cmd.current, true)) if (!mpeg2_slice()) return;
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
ipuRegs.ctrl.BUSY = 0;
ipuRegs.topbusy = 0; ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0; ipuRegs.cmd.BUSY = 0;
ipu_cmd.current = 0xffffffff;
IPU_INT0_FROM(); //if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
default:
Console.WriteLn("Unknown IPU command: %08x", ipu_cmd.current);
break; break;
case SCE_IPU_VDEC:
if (!ipuVDEC(ipu_cmd.current)) return;
ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0;
break;
case SCE_IPU_FDEC:
if (!ipuFDEC(ipu_cmd.current)) return;
ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0;
break;
case SCE_IPU_SETIQ:
if (!ipuSETIQ(ipu_cmd.current)) return;
break;
case SCE_IPU_SETVQ:
if (!ipuSETVQ(ipu_cmd.current)) return;
break;
case SCE_IPU_CSC:
if (!ipuCSC(ipu_cmd.current)) return;
break;
case SCE_IPU_PACK:
if (!ipuPACK(ipu_cmd.current)) return;
break;
jNO_DEFAULT
} }
// success // success
ipuRegs.ctrl.BUSY = 0; ipuRegs.ctrl.BUSY = 0;
ipu_cmd.current = 0xffffffff; ipu_cmd.current = 0xffffffff;
hwIntcIrq(INTC_IPU);
} }

View File

@ -33,6 +33,11 @@ struct tIPU_CMD
{ {
u32 DATA; u32 DATA;
u32 BUSY; u32 BUSY;
void SetBusy(bool busy=true)
{
BUSY = busy ? 0x80000000 : 0;
}
}; };
union tIPU_CTRL { union tIPU_CTRL {
@ -223,44 +228,60 @@ enum SCE_IPU
}; };
struct IPUregisters { struct IPUregisters {
tIPU_CMD cmd; tIPU_CMD cmd;
u32 dummy0[2]; u32 dummy0[2];
tIPU_CTRL ctrl;
u32 dummy1[3]; tIPU_CTRL ctrl;
u32 ipubp; u32 dummy1[3];
u32 dummy2[3];
u32 top; u32 ipubp;
u32 topbusy; u32 dummy2[3];
u32 dummy3[2];
u32 top;
u32 topbusy;
u32 dummy3[2];
void SetTopBusy()
{
topbusy = 0x80000000;
}
void SetDataBusy()
{
cmd.BUSY = 0x80000000;
topbusy = 0x80000000;
}
}; };
struct tIPU_cmd union tIPU_cmd
{ {
int index; struct
int pos[6];
union {
struct {
u32 OPTION : 28;
u32 CMD : 4;
};
u32 current;
};
void clear()
{ {
memzero(pos); int index;
index = 0; int pos[6];
current = 0xffffffff; union {
} struct {
u32 OPTION : 28;
u32 CMD : 4;
};
u32 current;
};
};
u128 _u128[2];
void clear();
wxString desc() const wxString desc() const
{ {
return wxsFormat(L"Ipu cmd: index = 0x%x, current = 0x%x, pos[0] = 0x%x, pos[1] = 0x%x", return pxsFmt(L"Ipu cmd: index = 0x%x, current = 0x%x, pos[0] = 0x%x, pos[1] = 0x%x",
index, current, pos[0], pos[1]); index, current, pos[0], pos[1]);
} }
}; };
static IPUregisters& ipuRegs = (IPUregisters&)eeHw[0x2000]; static IPUregisters& ipuRegs = (IPUregisters&)eeHw[0x2000];
extern tIPU_cmd ipu_cmd; extern __aligned16 tIPU_cmd ipu_cmd;
extern int coded_block_pattern; extern int coded_block_pattern;
extern int ipuInit(); extern int ipuInit();

View File

@ -381,9 +381,7 @@ __fi void dmaIPU0() // fromIPU
hwDmacIrq(DMAC_FROM_IPU); hwDmacIrq(DMAC_FROM_IPU);
} }
//IPUProcessInterrupt(); IPUProcessInterrupt();
extern void IPUWorker();
if (ipuRegs.ctrl.BUSY) IPUWorker();
} }
__fi void dmaIPU1() // toIPU __fi void dmaIPU1() // toIPU

View File

@ -688,7 +688,7 @@ void __fi finishmpeg2sliceIDEC()
coded_block_pattern = decoder.coded_block_pattern; coded_block_pattern = decoder.coded_block_pattern;
} }
bool mpeg2sliceIDEC() __fi bool mpeg2sliceIDEC()
{ {
u16 code; u16 code;
@ -922,7 +922,7 @@ finish_idec:
return true; return true;
} }
bool mpeg2_slice() __fi bool mpeg2_slice()
{ {
int DCT_offset, DCT_stride; int DCT_offset, DCT_stride;

View File

@ -37,7 +37,7 @@ __noinline void memzero_sse_a( T& dest )
float (*destxmm)[4] = (float(*)[4])&dest; float (*destxmm)[4] = (float(*)[4])&dest;
#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx][0], zeroreg) #define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx-1][0], zeroreg)
switch( MZFqwc & 0x07 ) switch( MZFqwc & 0x07 )
{ {