diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index e0c2829752..754b6e5a83 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -26,17 +26,10 @@ #include "Vif_Dma.h" #include -static __fi void IPU_INT0_FROM() -{ - if (ipu0dma.qwc > 0 && ipu0dma.chcr.STR) ipu0Interrupt(); -} - -tIPU_cmd ipu_cmd; - -void ReorderBitstream(); - // the BP doesn't advance and returns -1 if there is no data to be read +__aligned16 tIPU_cmd ipu_cmd; __aligned16 tIPU_BP g_BP; +__aligned16 decoder_t decoder; void IPUWorker(); @@ -53,11 +46,19 @@ int coded_block_pattern = 0; u8 indx4[16*16/2]; -__aligned16 decoder_t decoder; + + +void tIPU_cmd::clear() +{ + memzero_sse_a(*this); + //memzero(*this); + current = 0xffffffff; +} __fi void IPUProcessInterrupt() { - if (ipuRegs.ctrl.BUSY && g_BP.IFC) IPUWorker(); + if (ipuRegs.ctrl.BUSY) // && (g_BP.FP || g_BP.IFC || (ipu1dma.chcr.STR && ipu1dma.qwc > 0))) + IPUWorker(); } ///////////////////////////////////////////////////////// @@ -194,7 +195,7 @@ __fi u32 ipuRead32(u32 mem) pxAssert((mem & ~0xff) == 0x10002000); mem &= 0xff; // ipu repeats every 0x100 - //IPUProcessInterrupt(); + IPUProcessInterrupt(); switch (mem) { @@ -236,7 +237,7 @@ __fi u64 ipuRead64(u32 mem) pxAssert((mem & ~0xff) == 0x10002000); mem &= 0xff; // ipu repeats every 0x100 - //IPUProcessInterrupt(); + IPUProcessInterrupt(); switch (mem) { @@ -286,13 +287,12 @@ __fi bool ipuWrite32(u32 mem, u32 value) pxAssert((mem & ~0xfff) == 0x10002000); mem &= 0xfff; - IPUProcessInterrupt(); - switch (mem) { ipucase(IPU_CMD): // IPU_CMD IPU_LOG("write32: IPU_CMD=0x%08X", value); IPUCMD_WRITE(value); + IPUProcessInterrupt(); return false; ipucase(IPU_CTRL): // IPU_CTRL @@ -323,13 +323,12 @@ __fi bool ipuWrite64(u32 mem, u64 value) pxAssert((mem & ~0xfff) == 0x10002000); mem &= 0xfff; - IPUProcessInterrupt(); - switch (mem) { ipucase(IPU_CMD): IPU_LOG("write64: IPU_CMD=0x%08X", value); IPUCMD_WRITE((u32)value); + IPUProcessInterrupt(); return false; } @@ -352,72 +351,56 @@ static void ipuBCLR(u32 val) IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP); } -static bool ipuIDEC(u32 val, bool resume) +static __ri void ipuIDEC(tIPU_CMD_IDEC idec) { - tIPU_CMD_IDEC idec(val); - - if (!resume) - { - idec.log(); - g_BP.Advance(idec.FB); + idec.log(); //from IPU_CTRL - ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;) + ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;) - decoder.coding_type = ipuRegs.ctrl.PCT; - decoder.mpeg1 = ipuRegs.ctrl.MP1; - decoder.q_scale_type = ipuRegs.ctrl.QST; - decoder.intra_vlc_format = ipuRegs.ctrl.IVF; - decoder.scantype = ipuRegs.ctrl.AS; - decoder.intra_dc_precision = ipuRegs.ctrl.IDP; + decoder.coding_type = ipuRegs.ctrl.PCT; + decoder.mpeg1 = ipuRegs.ctrl.MP1; + decoder.q_scale_type = ipuRegs.ctrl.QST; + decoder.intra_vlc_format = ipuRegs.ctrl.IVF; + decoder.scantype = ipuRegs.ctrl.AS; + decoder.intra_dc_precision = ipuRegs.ctrl.IDP; //from IDEC value - decoder.quantizer_scale = idec.QSC; - decoder.frame_pred_frame_dct= !idec.DTD; - decoder.sgn = idec.SGN; - decoder.dte = idec.DTE; - decoder.ofm = idec.OFM; + decoder.quantizer_scale = idec.QSC; + decoder.frame_pred_frame_dct= !idec.DTD; + decoder.sgn = idec.SGN; + decoder.dte = idec.DTE; + decoder.ofm = idec.OFM; //other stuff - decoder.dcr = 1; // resets DC prediction value - } - - return mpeg2sliceIDEC(); + decoder.dcr = 1; // resets DC prediction value } static int s_bdec = 0; -static __fi bool ipuBDEC(u32 val, bool resume) +static __ri void ipuBDEC(tIPU_CMD_BDEC bdec) { - tIPU_CMD_BDEC bdec(val); + bdec.log(s_bdec); + if (IsDebugBuild) s_bdec++; - if (!resume) - { - bdec.log(s_bdec); - if (IsDebugBuild) s_bdec++; - - g_BP.Advance(bdec.FB); - decoder.coding_type = I_TYPE; - decoder.mpeg1 = ipuRegs.ctrl.MP1; - decoder.q_scale_type = ipuRegs.ctrl.QST; - decoder.intra_vlc_format = ipuRegs.ctrl.IVF; - decoder.scantype = ipuRegs.ctrl.AS; - decoder.intra_dc_precision = ipuRegs.ctrl.IDP; + decoder.coding_type = I_TYPE; + decoder.mpeg1 = ipuRegs.ctrl.MP1; + decoder.q_scale_type = ipuRegs.ctrl.QST; + decoder.intra_vlc_format = ipuRegs.ctrl.IVF; + decoder.scantype = ipuRegs.ctrl.AS; + decoder.intra_dc_precision = ipuRegs.ctrl.IDP; //from BDEC value - decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1; - decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0; - decoder.dcr = bdec.DCR; - decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN; + decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1; + decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0; + decoder.dcr = bdec.DCR; + decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN; - memzero_sse_a(decoder.mb8); - memzero_sse_a(decoder.mb16); - } - - return mpeg2_slice(); + memzero_sse_a(decoder.mb8); + memzero_sse_a(decoder.mb16); } -static bool __fastcall ipuVDEC(u32 val) +static __fi bool ipuVDEC(u32 val) { switch (ipu_cmd.pos[0]) { @@ -483,7 +466,7 @@ static bool __fastcall ipuVDEC(u32 val) return false; } -static __fi bool ipuFDEC(u32 val) +static __ri bool ipuFDEC(u32 val) { if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false; @@ -839,206 +822,154 @@ u8 getBits8(u8 *address, bool advance) // -------------------------------------------------------------------------------------- // IPU Worker / Dispatcher // -------------------------------------------------------------------------------------- -void IPUCMD_WRITE(u32 val) + +// When a command is written, we set some various busy flags and clear some other junk. +// The actual decoding will be handled by IPUworker. +__fi void IPUCMD_WRITE(u32 val) { // don't process anything if currently busy - if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread + //if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread ipuRegs.ctrl.ECD = 0; - ipuRegs.ctrl.SCD = 0; //clear ECD/SCD + ipuRegs.ctrl.SCD = 0; ipu_cmd.clear(); ipu_cmd.current = val; - switch (val >> 28) + switch (ipu_cmd.CMD) { + // BCLR and SETTH require no data so they always execute inline: + case SCE_IPU_BCLR: ipuBCLR(val); hwIntcIrq(INTC_IPU); //DMAC_TO_IPU + ipuRegs.ctrl.BUSY = 0; return; - case SCE_IPU_VDEC: - - g_BP.Advance(val & 0x3F); - - // check if enough data in queue - if (ipuVDEC(val)) return; - - ipuRegs.cmd.BUSY = 0x80000000; - ipuRegs.topbusy = 0x80000000; - break; - - case SCE_IPU_FDEC: - IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x", - val & 0x3f, g_BP.IFC, (int)g_BP.BP, ipu1dma.chcr._u32); - - g_BP.Advance(val & 0x3F); - - if (ipuFDEC(val)) return; - ipuRegs.cmd.BUSY = 0x80000000; - ipuRegs.topbusy = 0x80000000; - break; - case SCE_IPU_SETTH: ipuSETTH(val); hwIntcIrq(INTC_IPU); + ipuRegs.ctrl.BUSY = 0; return; + + + case SCE_IPU_IDEC: + g_BP.Advance(val & 0x3F); + ipuIDEC(val); + ipuRegs.SetTopBusy(); + break; + + case SCE_IPU_BDEC: + g_BP.Advance(val & 0x3F); + ipuBDEC(val); + ipuRegs.SetTopBusy(); + break; + + case SCE_IPU_VDEC: + g_BP.Advance(val & 0x3F); + ipuRegs.SetDataBusy(); + break; + + case SCE_IPU_FDEC: + IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, CHCR 0x%x", + val & 0x3f, g_BP.IFC, g_BP.BP, ipu1dma.chcr._u32); + + g_BP.Advance(val & 0x3F); + ipuRegs.SetDataBusy(); + break; + case SCE_IPU_SETIQ: IPU_LOG("SETIQ command."); - if (val & 0x3f) IPU_LOG("Skip %d bits.", val & 0x3f); g_BP.Advance(val & 0x3F); - if (ipuSETIQ(val)) return; break; case SCE_IPU_SETVQ: - if (ipuSETVQ(val)) return; break; case SCE_IPU_CSC: ipu_cmd.pos[1] = 0; ipu_cmd.index = 0; - - if (ipuCSC(val)) - { - IPU_INT0_FROM(); - return; - } break; case SCE_IPU_PACK: ipu_cmd.pos[1] = 0; ipu_cmd.index = 0; - if (ipuPACK(val)) return; break; - case SCE_IPU_IDEC: - if (ipuIDEC(val, false)) - { - // idec done, ipu0 done too - IPU_INT0_FROM(); - return; - } - - ipuRegs.topbusy = 0x80000000; - break; - - case SCE_IPU_BDEC: - if (ipuBDEC(val, false)) - { - IPU_INT0_FROM(); - if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU); - return; - } - else - { - ipuRegs.topbusy = 0x80000000; - } - break; + jNO_DEFAULT; } - // have to resort to the thread ipuRegs.ctrl.BUSY = 1; - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); + + //if(!ipu1dma.chcr.STR) hwIntcIrq(INTC_IPU); } -void IPUWorker() +__noinline void IPUWorker() { pxAssert(ipuRegs.ctrl.BUSY); switch (ipu_cmd.CMD) { - case SCE_IPU_VDEC: - if (!ipuVDEC(ipu_cmd.current)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } - ipuRegs.cmd.BUSY = 0; - ipuRegs.topbusy = 0; - break; - - case SCE_IPU_FDEC: - if (!ipuFDEC(ipu_cmd.current)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } - ipuRegs.cmd.BUSY = 0; - ipuRegs.topbusy = 0; - break; - - case SCE_IPU_SETIQ: - if (!ipuSETIQ(ipu_cmd.current)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } - break; - - case SCE_IPU_SETVQ: - if (!ipuSETVQ(ipu_cmd.current)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } - break; - - case SCE_IPU_CSC: - if (!ipuCSC(ipu_cmd.current)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } - IPU_INT0_FROM(); - break; - - case SCE_IPU_PACK: - if (!ipuPACK(ipu_cmd.current)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } - break; + // These are unreachable (BUSY will always be 0 for them) + //case SCE_IPU_BCLR: + //case SCE_IPU_SETTH: + //break; case SCE_IPU_IDEC: - if (!ipuIDEC(ipu_cmd.current, true)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } + if (!mpeg2sliceIDEC()) return; - ipuRegs.ctrl.OFC = 0; - ipuRegs.ctrl.BUSY = 0; + //ipuRegs.ctrl.OFC = 0; ipuRegs.topbusy = 0; ipuRegs.cmd.BUSY = 0; - ipu_cmd.current = 0xffffffff; // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it - IPU_INT0_FROM(); + //IPU_INT0_FROM(); break; case SCE_IPU_BDEC: - if (!ipuBDEC(ipu_cmd.current, true)) - { - if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU); - return; - } + if (!mpeg2_slice()) return; - ipuRegs.ctrl.BUSY = 0; ipuRegs.topbusy = 0; ipuRegs.cmd.BUSY = 0; - ipu_cmd.current = 0xffffffff; - IPU_INT0_FROM(); - if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU); - return; - - default: - Console.WriteLn("Unknown IPU command: %08x", ipu_cmd.current); + //if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU); break; + + case SCE_IPU_VDEC: + if (!ipuVDEC(ipu_cmd.current)) return; + + ipuRegs.topbusy = 0; + ipuRegs.cmd.BUSY = 0; + break; + + case SCE_IPU_FDEC: + if (!ipuFDEC(ipu_cmd.current)) return; + + ipuRegs.topbusy = 0; + ipuRegs.cmd.BUSY = 0; + break; + + case SCE_IPU_SETIQ: + if (!ipuSETIQ(ipu_cmd.current)) return; + break; + + case SCE_IPU_SETVQ: + if (!ipuSETVQ(ipu_cmd.current)) return; + break; + + case SCE_IPU_CSC: + if (!ipuCSC(ipu_cmd.current)) return; + break; + + case SCE_IPU_PACK: + if (!ipuPACK(ipu_cmd.current)) return; + break; + + jNO_DEFAULT } // success ipuRegs.ctrl.BUSY = 0; ipu_cmd.current = 0xffffffff; + hwIntcIrq(INTC_IPU); } diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index bba968e31c..4cf87733c1 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -33,6 +33,11 @@ struct tIPU_CMD { u32 DATA; u32 BUSY; + + void SetBusy(bool busy=true) + { + BUSY = busy ? 0x80000000 : 0; + } }; union tIPU_CTRL { @@ -223,44 +228,60 @@ enum SCE_IPU }; struct IPUregisters { - tIPU_CMD cmd; - u32 dummy0[2]; - tIPU_CTRL ctrl; - u32 dummy1[3]; - u32 ipubp; - u32 dummy2[3]; - u32 top; - u32 topbusy; - u32 dummy3[2]; + tIPU_CMD cmd; + u32 dummy0[2]; + + tIPU_CTRL ctrl; + u32 dummy1[3]; + + u32 ipubp; + u32 dummy2[3]; + + u32 top; + u32 topbusy; + u32 dummy3[2]; + + void SetTopBusy() + { + topbusy = 0x80000000; + } + + void SetDataBusy() + { + cmd.BUSY = 0x80000000; + topbusy = 0x80000000; + } + }; -struct tIPU_cmd +union tIPU_cmd { - int index; - int pos[6]; - union { - struct { - u32 OPTION : 28; - u32 CMD : 4; - }; - u32 current; - }; - void clear() + struct { - memzero(pos); - index = 0; - current = 0xffffffff; - } + int index; + int pos[6]; + union { + struct { + u32 OPTION : 28; + u32 CMD : 4; + }; + u32 current; + }; + }; + + u128 _u128[2]; + + void clear(); wxString desc() const { - return wxsFormat(L"Ipu cmd: index = 0x%x, current = 0x%x, pos[0] = 0x%x, pos[1] = 0x%x", + return pxsFmt(L"Ipu cmd: index = 0x%x, current = 0x%x, pos[0] = 0x%x, pos[1] = 0x%x", index, current, pos[0], pos[1]); } }; static IPUregisters& ipuRegs = (IPUregisters&)eeHw[0x2000]; -extern tIPU_cmd ipu_cmd; +extern __aligned16 tIPU_cmd ipu_cmd; extern int coded_block_pattern; extern int ipuInit(); diff --git a/pcsx2/IPU/IPUdma.cpp b/pcsx2/IPU/IPUdma.cpp index 26e9e80a6a..857d3fc922 100644 --- a/pcsx2/IPU/IPUdma.cpp +++ b/pcsx2/IPU/IPUdma.cpp @@ -381,9 +381,7 @@ __fi void dmaIPU0() // fromIPU hwDmacIrq(DMAC_FROM_IPU); } - //IPUProcessInterrupt(); - extern void IPUWorker(); - if (ipuRegs.ctrl.BUSY) IPUWorker(); + IPUProcessInterrupt(); } __fi void dmaIPU1() // toIPU diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp index bdef4b2d49..b00344288e 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp +++ b/pcsx2/IPU/mpeg2lib/Mpeg.cpp @@ -688,7 +688,7 @@ void __fi finishmpeg2sliceIDEC() coded_block_pattern = decoder.coded_block_pattern; } -bool mpeg2sliceIDEC() +__fi bool mpeg2sliceIDEC() { u16 code; @@ -922,7 +922,7 @@ finish_idec: return true; } -bool mpeg2_slice() +__fi bool mpeg2_slice() { int DCT_offset, DCT_stride; diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h index 5ea46631e7..85a7a68a4e 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.h +++ b/pcsx2/IPU/mpeg2lib/Mpeg.h @@ -37,7 +37,7 @@ __noinline void memzero_sse_a( T& dest ) float (*destxmm)[4] = (float(*)[4])&dest; -#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx][0], zeroreg) +#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx-1][0], zeroreg) switch( MZFqwc & 0x07 ) {