IPU: Cleanups and simplifications, and removed a whole lot of code that was force-setting ipu0dma's STR to 0 and/or flushing the FIFO for no reason. Tested tons of games, couldn't find any regressions.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3831 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-09-24 23:48:33 +00:00
parent 36d1503581
commit 9ebace0a8d
5 changed files with 183 additions and 233 deletions

View File

@ -26,17 +26,10 @@
#include "Vif_Dma.h"
#include <limits.h>
static __fi void IPU_INT0_FROM()
{
if (ipu0dma.qwc > 0 && ipu0dma.chcr.STR) ipu0Interrupt();
}
tIPU_cmd ipu_cmd;
void ReorderBitstream();
// the BP doesn't advance and returns -1 if there is no data to be read
__aligned16 tIPU_cmd ipu_cmd;
__aligned16 tIPU_BP g_BP;
__aligned16 decoder_t decoder;
void IPUWorker();
@ -53,11 +46,19 @@ int coded_block_pattern = 0;
u8 indx4[16*16/2];
__aligned16 decoder_t decoder;
void tIPU_cmd::clear()
{
memzero_sse_a(*this);
//memzero(*this);
current = 0xffffffff;
}
__fi void IPUProcessInterrupt()
{
if (ipuRegs.ctrl.BUSY && g_BP.IFC) IPUWorker();
if (ipuRegs.ctrl.BUSY) // && (g_BP.FP || g_BP.IFC || (ipu1dma.chcr.STR && ipu1dma.qwc > 0)))
IPUWorker();
}
/////////////////////////////////////////////////////////
@ -194,7 +195,7 @@ __fi u32 ipuRead32(u32 mem)
pxAssert((mem & ~0xff) == 0x10002000);
mem &= 0xff; // ipu repeats every 0x100
//IPUProcessInterrupt();
IPUProcessInterrupt();
switch (mem)
{
@ -236,7 +237,7 @@ __fi u64 ipuRead64(u32 mem)
pxAssert((mem & ~0xff) == 0x10002000);
mem &= 0xff; // ipu repeats every 0x100
//IPUProcessInterrupt();
IPUProcessInterrupt();
switch (mem)
{
@ -286,13 +287,12 @@ __fi bool ipuWrite32(u32 mem, u32 value)
pxAssert((mem & ~0xfff) == 0x10002000);
mem &= 0xfff;
IPUProcessInterrupt();
switch (mem)
{
ipucase(IPU_CMD): // IPU_CMD
IPU_LOG("write32: IPU_CMD=0x%08X", value);
IPUCMD_WRITE(value);
IPUProcessInterrupt();
return false;
ipucase(IPU_CTRL): // IPU_CTRL
@ -323,13 +323,12 @@ __fi bool ipuWrite64(u32 mem, u64 value)
pxAssert((mem & ~0xfff) == 0x10002000);
mem &= 0xfff;
IPUProcessInterrupt();
switch (mem)
{
ipucase(IPU_CMD):
IPU_LOG("write64: IPU_CMD=0x%08X", value);
IPUCMD_WRITE((u32)value);
IPUProcessInterrupt();
return false;
}
@ -352,72 +351,56 @@ static void ipuBCLR(u32 val)
IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP);
}
static bool ipuIDEC(u32 val, bool resume)
static __ri void ipuIDEC(tIPU_CMD_IDEC idec)
{
tIPU_CMD_IDEC idec(val);
if (!resume)
{
idec.log();
g_BP.Advance(idec.FB);
idec.log();
//from IPU_CTRL
ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;)
ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;)
decoder.coding_type = ipuRegs.ctrl.PCT;
decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST;
decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS;
decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
decoder.coding_type = ipuRegs.ctrl.PCT;
decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST;
decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS;
decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
//from IDEC value
decoder.quantizer_scale = idec.QSC;
decoder.frame_pred_frame_dct= !idec.DTD;
decoder.sgn = idec.SGN;
decoder.dte = idec.DTE;
decoder.ofm = idec.OFM;
decoder.quantizer_scale = idec.QSC;
decoder.frame_pred_frame_dct= !idec.DTD;
decoder.sgn = idec.SGN;
decoder.dte = idec.DTE;
decoder.ofm = idec.OFM;
//other stuff
decoder.dcr = 1; // resets DC prediction value
}
return mpeg2sliceIDEC();
decoder.dcr = 1; // resets DC prediction value
}
static int s_bdec = 0;
static __fi bool ipuBDEC(u32 val, bool resume)
static __ri void ipuBDEC(tIPU_CMD_BDEC bdec)
{
tIPU_CMD_BDEC bdec(val);
bdec.log(s_bdec);
if (IsDebugBuild) s_bdec++;
if (!resume)
{
bdec.log(s_bdec);
if (IsDebugBuild) s_bdec++;
g_BP.Advance(bdec.FB);
decoder.coding_type = I_TYPE;
decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST;
decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS;
decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
decoder.coding_type = I_TYPE;
decoder.mpeg1 = ipuRegs.ctrl.MP1;
decoder.q_scale_type = ipuRegs.ctrl.QST;
decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
decoder.scantype = ipuRegs.ctrl.AS;
decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
//from BDEC value
decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
decoder.dcr = bdec.DCR;
decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
decoder.dcr = bdec.DCR;
decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
memzero_sse_a(decoder.mb8);
memzero_sse_a(decoder.mb16);
}
return mpeg2_slice();
memzero_sse_a(decoder.mb8);
memzero_sse_a(decoder.mb16);
}
static bool __fastcall ipuVDEC(u32 val)
static __fi bool ipuVDEC(u32 val)
{
switch (ipu_cmd.pos[0])
{
@ -483,7 +466,7 @@ static bool __fastcall ipuVDEC(u32 val)
return false;
}
static __fi bool ipuFDEC(u32 val)
static __ri bool ipuFDEC(u32 val)
{
if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
@ -839,206 +822,154 @@ u8 getBits8(u8 *address, bool advance)
// --------------------------------------------------------------------------------------
// IPU Worker / Dispatcher
// --------------------------------------------------------------------------------------
void IPUCMD_WRITE(u32 val)
// When a command is written, we set some various busy flags and clear some other junk.
// The actual decoding will be handled by IPUworker.
__fi void IPUCMD_WRITE(u32 val)
{
// don't process anything if currently busy
if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread
//if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread
ipuRegs.ctrl.ECD = 0;
ipuRegs.ctrl.SCD = 0; //clear ECD/SCD
ipuRegs.ctrl.SCD = 0;
ipu_cmd.clear();
ipu_cmd.current = val;
switch (val >> 28)
switch (ipu_cmd.CMD)
{
// BCLR and SETTH require no data so they always execute inline:
case SCE_IPU_BCLR:
ipuBCLR(val);
hwIntcIrq(INTC_IPU); //DMAC_TO_IPU
ipuRegs.ctrl.BUSY = 0;
return;
case SCE_IPU_VDEC:
g_BP.Advance(val & 0x3F);
// check if enough data in queue
if (ipuVDEC(val)) return;
ipuRegs.cmd.BUSY = 0x80000000;
ipuRegs.topbusy = 0x80000000;
break;
case SCE_IPU_FDEC:
IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x",
val & 0x3f, g_BP.IFC, (int)g_BP.BP, ipu1dma.chcr._u32);
g_BP.Advance(val & 0x3F);
if (ipuFDEC(val)) return;
ipuRegs.cmd.BUSY = 0x80000000;
ipuRegs.topbusy = 0x80000000;
break;
case SCE_IPU_SETTH:
ipuSETTH(val);
hwIntcIrq(INTC_IPU);
ipuRegs.ctrl.BUSY = 0;
return;
case SCE_IPU_IDEC:
g_BP.Advance(val & 0x3F);
ipuIDEC(val);
ipuRegs.SetTopBusy();
break;
case SCE_IPU_BDEC:
g_BP.Advance(val & 0x3F);
ipuBDEC(val);
ipuRegs.SetTopBusy();
break;
case SCE_IPU_VDEC:
g_BP.Advance(val & 0x3F);
ipuRegs.SetDataBusy();
break;
case SCE_IPU_FDEC:
IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x",
val & 0x3f, g_BP.IFC, g_BP.BP, ipu1dma.chcr._u32);
g_BP.Advance(val & 0x3F);
ipuRegs.SetDataBusy();
break;
case SCE_IPU_SETIQ:
IPU_LOG("SETIQ command.");
if (val & 0x3f) IPU_LOG("Skip %d bits.", val & 0x3f);
g_BP.Advance(val & 0x3F);
if (ipuSETIQ(val)) return;
break;
case SCE_IPU_SETVQ:
if (ipuSETVQ(val)) return;
break;
case SCE_IPU_CSC:
ipu_cmd.pos[1] = 0;
ipu_cmd.index = 0;
if (ipuCSC(val))
{
IPU_INT0_FROM();
return;
}
break;
case SCE_IPU_PACK:
ipu_cmd.pos[1] = 0;
ipu_cmd.index = 0;
if (ipuPACK(val)) return;
break;
case SCE_IPU_IDEC:
if (ipuIDEC(val, false))
{
// idec done, ipu0 done too
IPU_INT0_FROM();
return;
}
ipuRegs.topbusy = 0x80000000;
break;
case SCE_IPU_BDEC:
if (ipuBDEC(val, false))
{
IPU_INT0_FROM();
if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
}
else
{
ipuRegs.topbusy = 0x80000000;
}
break;
jNO_DEFAULT;
}
// have to resort to the thread
ipuRegs.ctrl.BUSY = 1;
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
//if(!ipu1dma.chcr.STR) hwIntcIrq(INTC_IPU);
}
void IPUWorker()
__noinline void IPUWorker()
{
pxAssert(ipuRegs.ctrl.BUSY);
switch (ipu_cmd.CMD)
{
case SCE_IPU_VDEC:
if (!ipuVDEC(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
ipuRegs.cmd.BUSY = 0;
ipuRegs.topbusy = 0;
break;
case SCE_IPU_FDEC:
if (!ipuFDEC(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
ipuRegs.cmd.BUSY = 0;
ipuRegs.topbusy = 0;
break;
case SCE_IPU_SETIQ:
if (!ipuSETIQ(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
break;
case SCE_IPU_SETVQ:
if (!ipuSETVQ(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
break;
case SCE_IPU_CSC:
if (!ipuCSC(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
IPU_INT0_FROM();
break;
case SCE_IPU_PACK:
if (!ipuPACK(ipu_cmd.current))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
break;
// These are unreachable (BUSY will always be 0 for them)
//case SCE_IPU_BCLR:
//case SCE_IPU_SETTH:
//break;
case SCE_IPU_IDEC:
if (!ipuIDEC(ipu_cmd.current, true))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
if (!mpeg2sliceIDEC()) return;
ipuRegs.ctrl.OFC = 0;
ipuRegs.ctrl.BUSY = 0;
//ipuRegs.ctrl.OFC = 0;
ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0;
ipu_cmd.current = 0xffffffff;
// CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
IPU_INT0_FROM();
//IPU_INT0_FROM();
break;
case SCE_IPU_BDEC:
if (!ipuBDEC(ipu_cmd.current, true))
{
if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
}
if (!mpeg2_slice()) return;
ipuRegs.ctrl.BUSY = 0;
ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0;
ipu_cmd.current = 0xffffffff;
IPU_INT0_FROM();
if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
default:
Console.WriteLn("Unknown IPU command: %08x", ipu_cmd.current);
//if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
break;
case SCE_IPU_VDEC:
if (!ipuVDEC(ipu_cmd.current)) return;
ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0;
break;
case SCE_IPU_FDEC:
if (!ipuFDEC(ipu_cmd.current)) return;
ipuRegs.topbusy = 0;
ipuRegs.cmd.BUSY = 0;
break;
case SCE_IPU_SETIQ:
if (!ipuSETIQ(ipu_cmd.current)) return;
break;
case SCE_IPU_SETVQ:
if (!ipuSETVQ(ipu_cmd.current)) return;
break;
case SCE_IPU_CSC:
if (!ipuCSC(ipu_cmd.current)) return;
break;
case SCE_IPU_PACK:
if (!ipuPACK(ipu_cmd.current)) return;
break;
jNO_DEFAULT
}
// success
ipuRegs.ctrl.BUSY = 0;
ipu_cmd.current = 0xffffffff;
hwIntcIrq(INTC_IPU);
}

View File

@ -33,6 +33,11 @@ struct tIPU_CMD
{
u32 DATA;
u32 BUSY;
void SetBusy(bool busy=true)
{
BUSY = busy ? 0x80000000 : 0;
}
};
union tIPU_CTRL {
@ -223,44 +228,60 @@ enum SCE_IPU
};
struct IPUregisters {
tIPU_CMD cmd;
u32 dummy0[2];
tIPU_CTRL ctrl;
u32 dummy1[3];
u32 ipubp;
u32 dummy2[3];
u32 top;
u32 topbusy;
u32 dummy3[2];
tIPU_CMD cmd;
u32 dummy0[2];
tIPU_CTRL ctrl;
u32 dummy1[3];
u32 ipubp;
u32 dummy2[3];
u32 top;
u32 topbusy;
u32 dummy3[2];
void SetTopBusy()
{
topbusy = 0x80000000;
}
void SetDataBusy()
{
cmd.BUSY = 0x80000000;
topbusy = 0x80000000;
}
};
struct tIPU_cmd
union tIPU_cmd
{
int index;
int pos[6];
union {
struct {
u32 OPTION : 28;
u32 CMD : 4;
};
u32 current;
};
void clear()
struct
{
memzero(pos);
index = 0;
current = 0xffffffff;
}
int index;
int pos[6];
union {
struct {
u32 OPTION : 28;
u32 CMD : 4;
};
u32 current;
};
};
u128 _u128[2];
void clear();
wxString desc() const
{
return wxsFormat(L"Ipu cmd: index = 0x%x, current = 0x%x, pos[0] = 0x%x, pos[1] = 0x%x",
return pxsFmt(L"Ipu cmd: index = 0x%x, current = 0x%x, pos[0] = 0x%x, pos[1] = 0x%x",
index, current, pos[0], pos[1]);
}
};
static IPUregisters& ipuRegs = (IPUregisters&)eeHw[0x2000];
extern tIPU_cmd ipu_cmd;
extern __aligned16 tIPU_cmd ipu_cmd;
extern int coded_block_pattern;
extern int ipuInit();

View File

@ -381,9 +381,7 @@ __fi void dmaIPU0() // fromIPU
hwDmacIrq(DMAC_FROM_IPU);
}
//IPUProcessInterrupt();
extern void IPUWorker();
if (ipuRegs.ctrl.BUSY) IPUWorker();
IPUProcessInterrupt();
}
__fi void dmaIPU1() // toIPU

View File

@ -688,7 +688,7 @@ void __fi finishmpeg2sliceIDEC()
coded_block_pattern = decoder.coded_block_pattern;
}
bool mpeg2sliceIDEC()
__fi bool mpeg2sliceIDEC()
{
u16 code;
@ -922,7 +922,7 @@ finish_idec:
return true;
}
bool mpeg2_slice()
__fi bool mpeg2_slice()
{
int DCT_offset, DCT_stride;

View File

@ -37,7 +37,7 @@ __noinline void memzero_sse_a( T& dest )
float (*destxmm)[4] = (float(*)[4])&dest;
#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx][0], zeroreg)
#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx-1][0], zeroreg)
switch( MZFqwc & 0x07 )
{