diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 10593fbd2c..7b422b6c8d 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -36,12 +36,9 @@ // IPU Inline'd IRQs : Calls the IPU interrupt handlers directly instead of // feeding them through the EE's branch test. (see IPU.h for details) - - - -static tIPU_DMA g_nDMATransfer(0); -static tIPU_cmd ipu_cmd; -static IPUStatus IPU1Status; +tIPU_DMA g_nDMATransfer(0); +tIPU_cmd ipu_cmd; +IPUStatus IPU1Status; // FIXME - g_nIPU0Data and Pointer are not saved in the savestate, which breaks savestates for some // FMVs at random (if they get saved during the half frame of a 30fps rate). The fix is complicated @@ -53,9 +50,6 @@ void ReorderBitstream(); // the BP doesn't advance and returns -1 if there is no data to be read tIPU_BP g_BP; -static coroutine_t s_routine; // used for executing BDEC/IDEC -static int s_RoutineDone = 0; -static u32 s_tempstack[0x4000]; // 64k void IPUWorker(); @@ -78,7 +72,7 @@ __aligned16 macroblock_rgb16 rgb16; u8 indx4[16*16/2]; bool mpeg2_inited = false; //mpeg2_idct_init() must be called only once u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'}; -decoder_t g_decoder; //static, only to place it in bss +decoder_t decoder; //static, only to place it in bss decoder_t tempdec; extern "C" @@ -98,14 +92,14 @@ __forceinline void IPUProcessInterrupt() void init_g_decoder() { //other stuff - g_decoder.intra_quantizer_matrix = (u8*)iq; - g_decoder.non_intra_quantizer_matrix = (u8*)niq; - g_decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P - g_decoder.mb8 = &mb8; - g_decoder.mb16 = &mb16; - g_decoder.rgb32 = &rgb32; - g_decoder.rgb16 = &rgb16; - g_decoder.stride = 16; + decoder.intra_quantizer_matrix = (u8*)iq; + decoder.non_intra_quantizer_matrix = (u8*)niq; + decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P + decoder.mb8 = &mb8; + decoder.mb16 = &mb16; + decoder.rgb32 = &rgb32; + decoder.rgb16 = &rgb16; + decoder.stride = 16; } void mpeg2_init() @@ -159,7 +153,7 @@ void ReportIPU() Console.WriteLn("vqclut = 0x%x.", vqclut); Console.WriteLn("s_thresh = 0x%x.", s_thresh); Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern); - Console.WriteLn("g_decoder = 0x%x.", g_decoder); + Console.WriteLn("g_decoder = 0x%x.", decoder); Console.WriteLn("mpeg2: scan_norm = 0x%x, alt = 0x%x.", mpeg2_scan_norm, mpeg2_scan_alt); Console.WriteLn(ipu_cmd.desc()); Console.WriteLn("_readbits = 0x%x. readbits - _readbits, which is also frozen, is 0x%x.", @@ -186,7 +180,7 @@ void SaveStateBase::ipuFreeze() Freeze(vqclut); Freeze(s_thresh); Freeze(coded_block_pattern); - Freeze(g_decoder); + Freeze(decoder); Freeze(mpeg2_scan_norm); Freeze(mpeg2_scan_alt); @@ -377,72 +371,67 @@ static void ipuBCLR(u32 val) IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP); } -static BOOL ipuIDEC(u32 val) +static BOOL ipuIDEC(u32 val, bool resume) { tIPU_CMD_IDEC idec(val); - idec.log(); - g_BP.BP += idec.FB;//skip FB bits - //from IPU_CTRL - ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;) - g_decoder.coding_type = ipuRegs->ctrl.PCT; - g_decoder.mpeg1 = ipuRegs->ctrl.MP1; - g_decoder.q_scale_type = ipuRegs->ctrl.QST; - g_decoder.intra_vlc_format = ipuRegs->ctrl.IVF; - g_decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm; - g_decoder.intra_dc_precision = ipuRegs->ctrl.IDP; + if (!resume) + { + idec.log(); + g_BP.BP += idec.FB;//skip FB bits + //from IPU_CTRL + ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;) + decoder.coding_type = ipuRegs->ctrl.PCT; + decoder.mpeg1 = ipuRegs->ctrl.MP1; + decoder.q_scale_type = ipuRegs->ctrl.QST; + decoder.intra_vlc_format = ipuRegs->ctrl.IVF; + decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm; + decoder.intra_dc_precision = ipuRegs->ctrl.IDP; - //from IDEC value - g_decoder.quantizer_scale = idec.QSC; - g_decoder.frame_pred_frame_dct = !idec.DTD; - g_decoder.sgn = idec.SGN; - g_decoder.dte = idec.DTE; - g_decoder.ofm = idec.OFM; + //from IDEC value + decoder.quantizer_scale = idec.QSC; + decoder.frame_pred_frame_dct = !idec.DTD; + decoder.sgn = idec.SGN; + decoder.dte = idec.DTE; + decoder.ofm = idec.OFM; - //other stuff - g_decoder.dcr = 1; // resets DC prediction value + //other stuff + decoder.dcr = 1; // resets DC prediction value + } - s_routine = so_create(mpeg2sliceIDEC, &s_RoutineDone, s_tempstack, sizeof(s_tempstack)); - pxAssert(s_routine != NULL); - so_call(s_routine); - if (s_RoutineDone) s_routine = NULL; - - return s_RoutineDone; + return mpeg2sliceIDEC(); } static int s_bdec = 0; -static __forceinline BOOL ipuBDEC(u32 val) +static __forceinline BOOL ipuBDEC(u32 val, bool resume) { tIPU_CMD_BDEC bdec(val); - bdec.log(s_bdec); - if (IsDebugBuild) s_bdec++; + if (!resume) + { + bdec.log(s_bdec); + if (IsDebugBuild) s_bdec++; - g_BP.BP += bdec.FB;//skip FB bits - g_decoder.coding_type = I_TYPE; - g_decoder.mpeg1 = ipuRegs->ctrl.MP1; - g_decoder.q_scale_type = ipuRegs->ctrl.QST; - g_decoder.intra_vlc_format = ipuRegs->ctrl.IVF; - g_decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm; - g_decoder.intra_dc_precision = ipuRegs->ctrl.IDP; + g_BP.BP += bdec.FB;//skip FB bits + decoder.coding_type = I_TYPE; + decoder.mpeg1 = ipuRegs->ctrl.MP1; + decoder.q_scale_type = ipuRegs->ctrl.QST; + decoder.intra_vlc_format = ipuRegs->ctrl.IVF; + decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm; + decoder.intra_dc_precision = ipuRegs->ctrl.IDP; - //from BDEC value - /* JayteeMaster: the quantizer (linear/non linear) depends on the q_scale_type */ - g_decoder.quantizer_scale = g_decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1; - g_decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0; - g_decoder.dcr = bdec.DCR; - g_decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN; + //from BDEC value + decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1; + decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0; + decoder.dcr = bdec.DCR; + decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN; - memzero(mb8); - memzero(mb16); + memzero(mb8); + memzero(mb16); + } - s_routine = so_create(mpeg2_slice, &s_RoutineDone, s_tempstack, sizeof(s_tempstack)); - pxAssert(s_routine != NULL); - so_call(s_routine); - - if (s_RoutineDone) s_routine = NULL; - return s_RoutineDone; + return mpeg2_slice(); } static BOOL __fastcall ipuVDEC(u32 val) @@ -451,34 +440,34 @@ static BOOL __fastcall ipuVDEC(u32 val) { case 0: ipuRegs->cmd.DATA = 0; - if (!getBits32((u8*)&g_decoder.bitstream_buf, 0)) return FALSE; + if (!getBits32((u8*)&decoder.bitstream_buf, 0)) return FALSE; - g_decoder.bitstream_bits = -16; - BigEndian(g_decoder.bitstream_buf, g_decoder.bitstream_buf); + decoder.bitstream_bits = -16; + BigEndian(decoder.bitstream_buf, decoder.bitstream_buf); switch ((val >> 26) & 3) { case 0://Macroblock Address Increment - g_decoder.mpeg1 = ipuRegs->ctrl.MP1; - ipuRegs->cmd.DATA = get_macroblock_address_increment(&g_decoder); + decoder.mpeg1 = ipuRegs->ctrl.MP1; + ipuRegs->cmd.DATA = get_macroblock_address_increment(); break; - case 1://Macroblock Type //known issues: no error detected - g_decoder.frame_pred_frame_dct = 1;//prevent DCT_TYPE_INTERLACED - g_decoder.coding_type = ipuRegs->ctrl.PCT; - ipuRegs->cmd.DATA = get_macroblock_modes(&g_decoder); + case 1://Macroblock Type + decoder.frame_pred_frame_dct = 1; + decoder.coding_type = ipuRegs->ctrl.PCT; + ipuRegs->cmd.DATA = get_macroblock_modes(); break; - case 2://Motion Code //known issues: no error detected - ipuRegs->cmd.DATA = get_motion_delta(&g_decoder, 0); + case 2://Motion Code + ipuRegs->cmd.DATA = get_motion_delta(0); break; case 3://DMVector - ipuRegs->cmd.DATA = get_dmv(&g_decoder); + ipuRegs->cmd.DATA = get_dmv(); break; } - g_BP.BP += (g_decoder.bitstream_bits + 16); + g_BP.BP += (int)decoder.bitstream_bits + 16; if ((int)g_BP.BP < 0) { @@ -486,9 +475,7 @@ static BOOL __fastcall ipuVDEC(u32 val) ReorderBitstream(); } - FillInternalBuffer(&g_BP.BP, 1, 0); - - ipuRegs->cmd.DATA = (ipuRegs->cmd.DATA & 0xFFFF) | ((g_decoder.bitstream_bits + 16) << 16); + ipuRegs->cmd.DATA = (ipuRegs->cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16); ipuRegs->ctrl.ECD = (ipuRegs->cmd.DATA == 0); case 1: @@ -529,7 +516,10 @@ static BOOL ipuSETIQ(u32 val) if ((val >> 27) & 1) { - ipu_cmd.pos[0] += getBits((u8*)niq + ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1); // 8*8*8 + for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++) + { + if (!getBits64((u8*)niq + 8 * ipu_cmd.pos[0], 1)) return FALSE; + } IPU_LOG("Read non-intra quantization matrix from IPU FIFO."); for (i = 0; i < 8; i++) @@ -541,7 +531,10 @@ static BOOL ipuSETIQ(u32 val) } else { - ipu_cmd.pos[0] += getBits((u8*)iq + 8 * ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1); + for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++) + { + if (!getBits64((u8*)iq + 8 * ipu_cmd.pos[0], 1)) return FALSE; + } IPU_LOG("Read intra quantization matrix from IPU FIFO."); for (i = 0; i < 8; i++) @@ -552,40 +545,40 @@ static BOOL ipuSETIQ(u32 val) } } - return ipu_cmd.pos[0] == 64; + return TRUE; } static BOOL ipuSETVQ(u32 val) { - ipu_cmd.pos[0] += getBits((u8*)vqclut + ipu_cmd.pos[0], 256 - 8 * ipu_cmd.pos[0], 1); // 16*2*8 - - if (ipu_cmd.pos[0] == 32) + for(;ipu_cmd.pos[0] < 4; ipu_cmd.pos[0]++) { - IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO."); - IPU_LOG( - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d " - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d" - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d " - "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d", - vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F, - vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F, - vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F, - vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F, - vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F, - vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F, - vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F, - vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F, - vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F, - vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F, - vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F, - vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F, - vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F, - vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F, - vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F, - vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F); + if (!getBits64((u8*)vqclut + 8 * ipu_cmd.pos[0], 1)) return FALSE; } - return ipu_cmd.pos[0] == 32; + IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO."); + IPU_LOG( + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d " + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d" + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d " + "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d", + vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F, + vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F, + vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F, + vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F, + vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F, + vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F, + vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F, + vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F, + vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F, + vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F, + vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F, + vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F, + vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F, + vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F, + vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F, + vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F); + + return TRUE; } // IPU Transfers are split into 8Qwords so we need to send ALL the data @@ -596,17 +589,14 @@ static BOOL __fastcall ipuCSC(u32 val) for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++) { - - if (ipu_cmd.pos[0] < 3072 / 8) + for(;ipu_cmd.pos[0] < 48; ipu_cmd.pos[0]++) { - ipu_cmd.pos[0] += getBits((u8*) & mb8 + ipu_cmd.pos[0], 3072 - 8 * ipu_cmd.pos[0], 1); - - if (ipu_cmd.pos[0] < 3072 / 8) return FALSE; - - ipu_csc(&mb8, &rgb32, 0); - if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE); + if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE; } + ipu_csc(&mb8, &rgb32, 0); + if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE); + if (csc.OFM) { while (ipu_cmd.pos[1] < 32) @@ -641,18 +631,16 @@ static BOOL ipuPACK(u32 val) for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++) { - if (ipu_cmd.pos[0] < 512) + for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++) { - ipu_cmd.pos[0] += getBits((u8*) & mb8 + ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1); - - if (ipu_cmd.pos[0] < 64) return FALSE; - - ipu_csc(&mb8, &rgb32, 0); - ipu_dither(&rgb32, &rgb16, csc.DTE); - - if (csc.OFM) ipu_vq(&rgb16, indx4); + if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE; } + ipu_csc(&mb8, &rgb32, 0); + ipu_dither(&rgb32, &rgb16, csc.DTE); + + if (csc.OFM) ipu_vq(&rgb16, indx4); + if (csc.OFM) { ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]); @@ -696,7 +684,7 @@ void IPUCMD_WRITE(u32 val) ipuRegs->ctrl.ECD = 0; ipuRegs->ctrl.SCD = 0; //clear ECD/SCD ipuRegs->cmd.DATA = val; - ipu_cmd.pos[0] = 0; + ipu_cmd.clear(); switch (ipuRegs->cmd.CMD) { @@ -759,29 +747,27 @@ void IPUCMD_WRITE(u32 val) break; case SCE_IPU_IDEC: - if (ipuIDEC(val)) + if (ipuIDEC(val, false)) { // idec done, ipu0 done too if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM(); return; } + ipuRegs->topbusy = 0x80000000; - // have to resort to the thread - ipu_cmd.current = val >> 28; - ipuRegs->ctrl.BUSY = 1; - return; + break; case SCE_IPU_BDEC: - if (ipuBDEC(val)) + if (ipuBDEC(val, false)) { if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM(); if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU); return; } - ipuRegs->topbusy = 0x80000000; - ipu_cmd.current = val >> 28; - ipuRegs->ctrl.BUSY = 1; - return; + else + { + ipuRegs->topbusy = 0x80000000; + } } // have to resort to the thread @@ -850,8 +836,7 @@ void IPUWorker() break; case SCE_IPU_IDEC: - so_call(s_routine); - if (!s_RoutineDone) + if (!ipuIDEC(ipuRegs->cmd.DATA, true)) { if(ipu1dma->chcr.STR == false) hwIntcIrq(INTC_IPU); return; @@ -865,12 +850,10 @@ void IPUWorker() // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM(); - s_routine = NULL; break; case SCE_IPU_BDEC: - so_call(s_routine); - if (!s_RoutineDone) + if (!ipuBDEC(ipuRegs->cmd.DATA, true)) { if(ipu1dma->chcr.STR == false) hwIntcIrq(INTC_IPU); return; @@ -882,7 +865,6 @@ void IPUWorker() ipu_cmd.current = 0xffffffff; if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM(); - s_routine = NULL; if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU); return; @@ -946,7 +928,7 @@ u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size) g_BP.FP = 1; } - if ((g_BP.FP < 2) && (*(int*)pointer + size) >= 128) + if ((g_BP.FP < 2) && ((*(int*)pointer + size) >= 128)) { if (ipu_fifo.in.read(next_readbits())) g_BP.FP += 1; } @@ -967,6 +949,83 @@ u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size) return (g_BP.FP >= 1) ? g_BP.FP * 128 - (*(int*)pointer) : 0; } +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +u8 __fastcall getBits128(u8 *address, u32 advance) +{ + u64 mask2; + u128 mask; + u32 shift; + u8* readpos; + + // Check if the current BP has exceeded or reached the limit of 128 + if (FillInternalBuffer(&g_BP.BP, 1, 128) < 128) return 0; + + readpos = readbits + (int)g_BP.BP / 8; + + if (g_BP.BP & 7) + { + shift = g_BP.BP & 7; + mask2 = 0xff >> shift; + mask.lo = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56); + mask.hi = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56); + + u128 notMask; + u128 data = *(u128*)(readpos + 1); + notMask.lo = ~mask.lo & data.lo; + notMask.hi = ~mask.hi & data.hi; + notMask.lo >>= 8 - shift; + notMask.lo |= (notMask.hi & (0xFFFFFFFFFFFFFFFF >> (64 - shift))) << (64 - shift); + notMask.hi >>= 8 - shift; + + mask.hi = (((*(u128*)readpos).hi & mask.hi) << shift) | (((*(u128*)readpos).lo & mask.lo) >> (64 - shift)); + mask.lo = ((*(u128*)readpos).lo & mask.lo) << shift; + + notMask.lo |= mask.lo; + notMask.hi |= mask.hi; + *(u128*)address = notMask; + } + else + { + *(u128*)address = *(u128*)readpos; + } + + if (advance) g_BP.BP += 128; + + return 1; +} + +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +u8 __fastcall getBits64(u8 *address, u32 advance) +{ + register u64 mask = 0; + int shift = 0; + u8* readpos; + + // Check if the current BP has exceeded or reached the limit of 128 + if (FillInternalBuffer(&g_BP.BP, 1, 64) < 64) return 0; + + readpos = readbits + (int)g_BP.BP / 8; + + if (g_BP.BP & 7) + { + shift = g_BP.BP & 7; + mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56); + + *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift); + } + else + { + *(u64*)address = *(u64*)readpos; + } + + if (advance) g_BP.BP += 64; + + return 1; +} + // whenever reading fractions of bytes. The low bits always come from the next byte // while the high bits come from the current byte u8 __fastcall getBits32(u8 *address, u32 advance) @@ -1053,102 +1112,6 @@ u8 __fastcall getBits8(u8 *address, u32 advance) return 1; } -int __fastcall getBits(u8 *address, u32 size, u32 advance) -{ - register u32 mask = 0, shift = 0, howmuch; - u8* oldbits, *oldaddr = address; - u32 pointer = 0, temp; - - // Check if the current BP has exceeded or reached the limit of 128 - if (FillInternalBuffer(&g_BP.BP, 1, 8) < 8) return 0; - - oldbits = readbits; - // Backup the current BP in case of VDEC/FDEC - pointer = g_BP.BP; - - if (pointer & 7) - { - address--; - while (size) - { - if (shift == 0) - { - *++address = 0; - shift = 8; - } - - temp = shift; // Lets not pass a register to min. - howmuch = min(min(8 - (pointer & 7), 128 - pointer), min(size, temp)); - - if (FillInternalBuffer(&pointer, advance, 8) < 8) - { - if (advance) g_BP.BP = pointer; - return address - oldaddr; - } - - mask = ((0xFF >> (pointer & 7)) << (8 - howmuch - (pointer & 7))) & 0xFF; - mask &= readbits[((pointer) >> 3)]; - mask >>= 8 - howmuch - (pointer & 7); - pointer += howmuch; - size -= howmuch; - shift -= howmuch; - *address |= mask << shift; - } - ++address; - } - else - { - u8* readmem; - while (size) - { - if (FillInternalBuffer(&pointer, advance, 8) < 8) - { - if (advance) g_BP.BP = pointer; - return address -oldaddr; - } - - howmuch = min(128 - pointer, size); - size -= howmuch; - - readmem = readbits + (pointer >> 3); - pointer += howmuch; - howmuch >>= 3; - - while (howmuch >= 4) - { - *(u32*)address = *(u32*)readmem; - howmuch -= 4; - address += 4; - readmem += 4; - } - - switch (howmuch) - { - case 3: - address[2] = readmem[2]; - case 2: - address[1] = readmem[1]; - case 1: - address[0] = readmem[0]; - case 0: - break; - - jNO_DEFAULT - } - - address += howmuch; - } - } - - // If not advance then reset the Reading buffer value - if (advance) - g_BP.BP = pointer; - else - readbits = oldbits; // restore the last pointer - - return address - oldaddr; -} - ///////////////////// CORE FUNCTIONS ///////////////// void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V, const int Src_BpS, const int Width, const int Height); @@ -1244,7 +1207,7 @@ static __forceinline void ipuDmacSrcChain() { case TAG_REFE: // refe //if(IPU1Status.InProgress == false) ipu1dma->tadr += 16; - if(IPU1Status.DMAFinished == false) IPU1Status.DMAFinished = true; + IPU1Status.DMAFinished = true; break; case TAG_CNT: // cnt // Set the taddr to the next tag @@ -1264,7 +1227,7 @@ static __forceinline void ipuDmacSrcChain() case TAG_END: // end ipu1dma->tadr = ipu1dma->madr; - if(IPU1Status.DMAFinished == false) IPU1Status.DMAFinished = true; + IPU1Status.DMAFinished = true; break; } } @@ -1300,7 +1263,6 @@ static __forceinline int IPU1chain() { if (ipu1dma->qwc > 0 && IPU1Status.InProgress == true) { - int qwc = ipu1dma->qwc; u32 *pMem; @@ -1308,7 +1270,8 @@ static __forceinline int IPU1chain() { if (pMem == NULL) { - Console.Error("ipu1dma NULL!"); return totalqwc; + Console.Error("ipu1dma NULL!"); + return totalqwc; } //Write our data to the fifo @@ -1484,7 +1447,6 @@ int IPU1dma() } else { - IPU_LOG("Here"); cpuRegs.eCycle[4] = 0x9999;//IPU_INT_TO(2048); } @@ -1601,7 +1563,6 @@ __forceinline void dmaIPU1() // toIPU IPU1Status.DMAMode = DMA_MODE_CHAIN; IPU1dma(); - //if (ipuRegs->ctrl.BUSY) IPUWorker(); } else //Normal Mode { @@ -1623,7 +1584,6 @@ __forceinline void dmaIPU1() // toIPU IPU1Status.DMAFinished = true; IPU1Status.DMAMode = DMA_MODE_NORMAL; IPU1dma(); - //if (ipuRegs->ctrl.BUSY) IPUWorker(); } } } diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index ab831329aa..bb23f05e33 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -17,7 +17,6 @@ #define __IPU_H__ #include "mpeg2lib/Mpeg.h" -#include "coroutine.h" #include "IPU_Fifo.h" #ifdef _MSC_VER @@ -327,7 +326,7 @@ struct IPUregisters { struct tIPU_cmd { int index; - int pos[2]; + int pos[6]; int current; void clear() { @@ -342,12 +341,13 @@ struct tIPU_cmd } }; -//extern tIPU_cmd ipu_cmd; +extern tIPU_cmd ipu_cmd; extern tIPU_BP g_BP; extern int coded_block_pattern; extern int g_nIPU0Data; // or 0x80000000 whenever transferring extern u8* g_pIPU0Pointer; - +extern IPUStatus IPU1Status; +extern tIPU_DMA g_nDMATransfer; // The IPU can only do one task at once and never uses other buffers so these // should be made available to functions in other modules to save registers. extern __aligned16 macroblock_rgb32 rgb32; @@ -376,10 +376,11 @@ extern int IPU0dma(); extern int IPU1dma(); extern u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size); +extern u8 __fastcall getBits128(u8 *address, u32 advance); +extern u8 __fastcall getBits64(u8 *address, u32 advance); extern u8 __fastcall getBits32(u8 *address, u32 advance); extern u8 __fastcall getBits16(u8 *address, u32 advance); extern u8 __fastcall getBits8(u8 *address, u32 advance); -extern int __fastcall getBits(u8 *address, u32 size, u32 advance); #endif diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp index 66b1af502c..2435903aa3 100644 --- a/pcsx2/IPU/IPU_Fifo.cpp +++ b/pcsx2/IPU/IPU_Fifo.cpp @@ -13,7 +13,6 @@ * If not, see . */ - #include "PrecompiledHeader.h" #include "Common.h" #include "IPU_Fifo.h" @@ -106,20 +105,18 @@ int IPU_Fifo_Output::write(const u32 *value, int size) ipuRegs->ctrl.OFC += firsttrans; IPU0dma(); - //Console.WriteLn("Written %d qwords, %d", firsttrans,ipuRegs->ctrl.OFC); return firsttrans; } int IPU_Fifo_Input::read(void *value) { - // wait until enough data - if (g_BP.IFC < 8) + // wait until enough data to ensure proper streaming. + if (g_BP.IFC < 4) { // IPU FIFO is empty and DMA is waiting so lets tell the DMA we are ready to put data in the FIFO if(cpuRegs.eCycle[4] == 0x9999) { - //DevCon.Warning("Setting ECycle"); CPU_INT( DMAC_TO_IPU, 4 ); } diff --git a/pcsx2/IPU/acoroutine.S b/pcsx2/IPU/acoroutine.S deleted file mode 100644 index 2c28a2c248..0000000000 --- a/pcsx2/IPU/acoroutine.S +++ /dev/null @@ -1,78 +0,0 @@ -.intel_syntax noprefix - -.extern g_pCurrentRoutine - -.globl so_call -so_call: - mov eax, dword ptr [esp+4] - test dword ptr [eax+24], 1 - jnz RestoreRegs - mov [eax+8], ebx - mov [eax+12], esi - mov [eax+16], edi - mov [eax+20], ebp - mov dword ptr [eax+24], 1 - jmp CallFn -RestoreRegs: - // have to load and save at the same time - mov ecx, [eax+8] - mov edx, [eax+12] - mov [eax+8], ebx - mov [eax+12], esi - mov ebx, ecx - mov esi, edx - mov ecx, [eax+16] - mov edx, [eax+20] - mov [eax+16], edi - mov [eax+20], ebp - mov edi, ecx - mov ebp, edx - -CallFn: - mov [g_pCurrentRoutine], eax - mov ecx, esp - mov esp, [eax+4] - mov [eax+4], ecx - - jmp dword ptr [eax] - -.globl so_resume -so_resume: - mov eax, [g_pCurrentRoutine] - mov ecx, [eax+8] - mov edx, [eax+12] - mov [eax+8], ebx - mov [eax+12], esi - mov ebx, ecx - mov esi, edx - mov ecx, [eax+16] - mov edx, [eax+20] - mov [eax+16], edi - mov [eax+20], ebp - mov edi, ecx - mov ebp, edx - - // put the return address in pcalladdr - mov ecx, [esp] - mov [eax], ecx - add esp, 4 // remove the return address - - // swap stack pointers - mov ecx, [eax+4] - mov [eax+4], esp - mov esp, ecx - ret - -.globl so_exit -so_exit: - mov eax, [g_pCurrentRoutine] - mov esp, [eax+4] - mov ebx, [eax+8] - mov esi, [eax+12] - mov edi, [eax+16] - mov ebp, [eax+20] - ret - -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif diff --git a/pcsx2/IPU/acoroutine.asm b/pcsx2/IPU/acoroutine.asm deleted file mode 100644 index d81a5f12d2..0000000000 --- a/pcsx2/IPU/acoroutine.asm +++ /dev/null @@ -1,140 +0,0 @@ -; Pcsx2 - Pc Ps2 Emulator -; Copyright (C) 2002-2008 Pcsx2 Team -; -; This program is free software; you can redistribute it and/or modify -; it under the terms of the GNU General Public License as published by -; the Free Software Foundation; either version 2 of the License, or -; (at your option) any later version. - -; This program is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; GNU General Public License for more details. -; -; You should have received a copy of the GNU General Public License -; along with this program; if not, write to the Free Software -; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - -;; x86-64 coroutine fucntions -extern g_pCurrentRoutine:ptr - -.code - -so_call proc public - test dword ptr [rcx+88], 1 - jnz so_call_RestoreRegs - mov [rcx+24], rbp - mov [rcx+16], rbx - mov [rcx+32], r12 - mov [rcx+40], r13 - mov [rcx+48], r14 - mov [rcx+56], r15 - mov [rcx+64], rsi - mov [rcx+72], rdi - mov dword ptr [rcx+88], 1 - jmp so_call_CallFn -so_call_RestoreRegs: - ;; have to load and save at the same time - ;; rbp, rbx, r12 - mov rax, [rcx+24] - mov r8, [rcx+16] - mov rdx, [rcx+32] - mov [rcx+24], rbp - mov [rcx+16], rbx - mov [rcx+32], r12 - mov rbp, rax - mov rbx, r8 - mov r12, rdx - ;; r13, r14, r15 - mov rax, [rcx+40] - mov r8, [rcx+48] - mov rdx, [rcx+56] - mov [rcx+40], r13 - mov [rcx+48], r14 - mov [rcx+56], r15 - mov r13, rax - mov r14, r8 - mov r15, rdx - - ;; rsi, rdi - mov rax, [rcx+64] - mov rdx, [rcx+72] - mov [rcx+64], rsi - mov [rcx+72], rdi - mov rsi, rax - mov rdi, rdx - -so_call_CallFn: - mov [g_pCurrentRoutine], rcx - - ;; swap the stack - mov rax, [rcx+8] - mov [rcx+8], rsp - mov rsp, rax - mov rax, [rcx+0] - mov rcx, [rcx+80] - - jmp rax - -so_call endp - -; so_resume -so_resume proc public - ;; rbp, rbx, r12 - mov rcx, [g_pCurrentRoutine] - mov rax, [rcx+24] - mov r8, [rcx+16] - mov rdx, [rcx+32] - mov [rcx+24], rbp - mov [rcx+16], rbx - mov [rcx+32], r12 - mov rbp, rax - mov rbx, r8 - mov r12, rdx - ;; r13, r14, r15 - mov rax, [rcx+40] - mov r8, [rcx+48] - mov rdx, [rcx+56] - mov [rcx+40], r13 - mov [rcx+48], r14 - mov [rcx+56], r15 - mov r13, rax - mov r14, r8 - mov r15, rdx - ;; rsi, rdi - mov rax, [rcx+64] - mov rdx, [rcx+72] - mov [rcx+64], rsi - mov [rcx+72], rdi - mov rsi, rax - mov rdi, rdx - - ;; put the return address in pcalladdr - mov rax, [rsp] - mov [rcx], rax - add rsp, 8 ;; remove the return address - - ;; swap stack pointers - mov rax, [rcx+8] - mov [rcx+8], rsp - mov rsp, rax - - ret - -so_resume endp - -so_exit proc public - mov rcx, [g_pCurrentRoutine] - mov rsp, [rcx+8] - mov rbp, [rcx+24] - mov rbx, [rcx+16] - mov r12, [rcx+32] - mov r13, [rcx+40] - mov r14, [rcx+48] - mov r15, [rcx+56] - mov rsi, [rcx+64] - mov rdi, [rcx+72] - ret -so_exit endp - -end \ No newline at end of file diff --git a/pcsx2/IPU/coroutine.cpp b/pcsx2/IPU/coroutine.cpp deleted file mode 100644 index b2992091b1..0000000000 --- a/pcsx2/IPU/coroutine.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - - -#include "PrecompiledHeader.h" - -#include "coroutine.h" - -struct coroutine { - void* pcalladdr; - void *pcurstack; - - uptr storeebx, storeesi, storeedi, storeebp; - - s32 restore; // if nonzero, restore the registers - s32 alloc; - //struct s_coroutine *caller; - //struct s_coroutine *restarget; - -}; - -#define CO_STK_ALIGN 256 -#define CO_STK_COROSIZE ((sizeof(coroutine) + CO_STK_ALIGN - 1) & ~(CO_STK_ALIGN - 1)) -#define CO_MIN_SIZE (4 * 1024) - -coroutine* g_pCurrentRoutine; - -coroutine_t so_create(void (*func)(void *), void *data, void *stack, int size) -{ - void* endstack; - int alloc = 0; // r = CO_STK_COROSIZE; - coroutine *co; - - if ((size &= ~(sizeof(s32) - 1)) < CO_MIN_SIZE) return NULL; - if (!stack) { - size = (size + sizeof(coroutine) + CO_STK_ALIGN - 1) & ~(CO_STK_ALIGN - 1); - stack = malloc(size); - if (!stack) return NULL; - alloc = size; - } - endstack = (char*)stack + size - 64; - co = (coroutine*)stack; - stack = (char *) stack + CO_STK_COROSIZE; - *(void**)endstack = NULL; - *(void**)((char*)endstack+sizeof(void*)) = data; - co->alloc = alloc; - co->pcalladdr = (void*)func; - co->pcurstack = endstack; - return co; -} - -void so_delete(coroutine_t coro) -{ - coroutine *co = (coroutine *) coro; - pxAssert( co != NULL ); - if (co->alloc) free(co); -} - -// see acoroutines.S and acoroutines.asm for other asm implementations -#if defined(_MSC_VER) - -__declspec(naked) void so_call(coroutine_t coro) -{ - __asm { - mov eax, dword ptr [esp+4] - test dword ptr [eax+24], 1 - jnz RestoreRegs - mov [eax+8], ebx - mov [eax+12], esi - mov [eax+16], edi - mov [eax+20], ebp - mov dword ptr [eax+24], 1 - jmp CallFn -RestoreRegs: - // have to load and save at the same time - mov ecx, [eax+8] - mov edx, [eax+12] - mov [eax+8], ebx - mov [eax+12], esi - mov ebx, ecx - mov esi, edx - mov ecx, [eax+16] - mov edx, [eax+20] - mov [eax+16], edi - mov [eax+20], ebp - mov edi, ecx - mov ebp, edx - -CallFn: - mov [g_pCurrentRoutine], eax - mov ecx, esp - mov esp, [eax+4] - mov [eax+4], ecx - - jmp dword ptr [eax] - } -} - -__declspec(naked) void so_resume(void) -{ - __asm { - mov eax, [g_pCurrentRoutine] - mov ecx, [eax+8] - mov edx, [eax+12] - mov [eax+8], ebx - mov [eax+12], esi - mov ebx, ecx - mov esi, edx - mov ecx, [eax+16] - mov edx, [eax+20] - mov [eax+16], edi - mov [eax+20], ebp - mov edi, ecx - mov ebp, edx - - // put the return address in pcalladdr - mov ecx, [esp] - mov [eax], ecx - add esp, 4 // remove the return address - - // swap stack pointers - mov ecx, [eax+4] - mov [eax+4], esp - mov esp, ecx - ret - } -} - -__declspec(naked) void so_exit(void) -{ - __asm { - mov eax, [g_pCurrentRoutine] - mov esp, [eax+4] - mov ebx, [eax+8] - mov esi, [eax+12] - mov edi, [eax+16] - mov ebp, [eax+20] - ret - } -} -#endif diff --git a/pcsx2/IPU/coroutine.h b/pcsx2/IPU/coroutine.h deleted file mode 100644 index 7d40348450..0000000000 --- a/pcsx2/IPU/coroutine.h +++ /dev/null @@ -1,27 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#ifndef PCSX2_COROUTINE_LIB -#define PCSX2_COROUTINE_LIB - -// low level coroutine library -typedef void *coroutine_t; - -coroutine_t so_create(void (*func)(void *), void *data, void *stack, int size); -void so_delete(coroutine_t coro); - -#include "NakedAsm.h" - -#endif diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp index c7c8afca15..7b7a278fa9 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp +++ b/pcsx2/IPU/mpeg2lib/Mpeg.cpp @@ -48,55 +48,51 @@ int non_linear_quantizer_scale [] = back to the 1st slot when 128bits have been read. */ extern void ReorderBitstream(); +const DCTtab * tab; +int mbaCount = 0; -int get_macroblock_modes(decoder_t * const decoder) +int get_macroblock_modes() { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) int macroblock_modes; const MBtab * tab; - switch (decoder->coding_type) + switch (decoder.coding_type) { - case I_TYPE: - macroblock_modes = UBITS(bit_buf, 2); + macroblock_modes = UBITS(2); if (macroblock_modes == 0) return 0; // error tab = MB_I + (macroblock_modes >> 1); - DUMPBITS(bit_buf, bits, tab->len); + DUMPBITS(tab->len); macroblock_modes = tab->modes; - if ((!(decoder->frame_pred_frame_dct)) && - (decoder->picture_structure == FRAME_PICTURE)) + if ((!(decoder.frame_pred_frame_dct)) && + (decoder.picture_structure == FRAME_PICTURE)) { - macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS(bit_buf, bits, 1); + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; } return macroblock_modes; case P_TYPE: - macroblock_modes = UBITS(bit_buf, 6); + macroblock_modes = UBITS(6); if (macroblock_modes == 0) return 0; // error tab = MB_P + (macroblock_modes >> 1); - DUMPBITS(bit_buf, bits, tab->len); + DUMPBITS(tab->len); macroblock_modes = tab->modes; - if (decoder->picture_structure != FRAME_PICTURE) + if (decoder.picture_structure != FRAME_PICTURE) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS(bit_buf, bits, 2); + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; } return macroblock_modes; } - else if (decoder->frame_pred_frame_dct) + else if (decoder.frame_pred_frame_dct) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) macroblock_modes |= MC_FRAME; @@ -107,39 +103,36 @@ int get_macroblock_modes(decoder_t * const decoder) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS(bit_buf, bits, 2); + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; } if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS(bit_buf, bits, 1); + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; } return macroblock_modes; } case B_TYPE: - macroblock_modes = UBITS(bit_buf, 6); + macroblock_modes = UBITS(6); if (macroblock_modes == 0) return 0; // error tab = MB_B + macroblock_modes; - DUMPBITS(bit_buf, bits, tab->len); + DUMPBITS(tab->len); macroblock_modes = tab->modes; - if (decoder->picture_structure != FRAME_PICTURE) + if (decoder.picture_structure != FRAME_PICTURE) { if (!(macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS(bit_buf, bits, 2); + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; } return macroblock_modes; } - else if (decoder->frame_pred_frame_dct) + else if (decoder.frame_pred_frame_dct) { /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ macroblock_modes |= MC_FRAME; @@ -149,968 +142,544 @@ int get_macroblock_modes(decoder_t * const decoder) { if (macroblock_modes & MACROBLOCK_INTRA) goto intra; - macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS(bit_buf, bits, 2); + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { intra: - macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS(bit_buf, bits, 1); + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; } return macroblock_modes; } case D_TYPE: - macroblock_modes = UBITS(bit_buf, 1); + macroblock_modes = GETBITS(1); if (macroblock_modes == 0) return 0; // error - - DUMPBITS(bit_buf, bits, 1); return MACROBLOCK_INTRA; default: return 0; } - -#undef bit_buf -#undef bits -#undef bit_ptr } -static __forceinline int get_quantizer_scale(decoder_t * const decoder) +static __forceinline int get_quantizer_scale() { int quantizer_scale_code; - quantizer_scale_code = UBITS(decoder->bitstream_buf, 5); - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 5); + quantizer_scale_code = GETBITS(5); - if (decoder->q_scale_type) + if (decoder.q_scale_type) return non_linear_quantizer_scale [quantizer_scale_code]; else return quantizer_scale_code << 1; } -static __forceinline int get_coded_block_pattern(decoder_t * const decoder) +static __forceinline int get_coded_block_pattern() { const CBPtab * tab; + u16 code = UBITS(16); - NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr); - - if (decoder->bitstream_buf >= 0x20000000) - tab = CBP_7 + (UBITS(decoder->bitstream_buf, 7) - 16); + if (code >= 0x2000) + tab = CBP_7 + (UBITS(7) - 16); else - tab = CBP_9 + UBITS(decoder->bitstream_buf, 9); + tab = CBP_9 + UBITS(9); - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, tab->len); + DUMPBITS(tab->len); return tab->cbp; } -static __forceinline int get_luma_dc_dct_diff(decoder_t * const decoder) +int __forceinline get_motion_delta(const int f_code) { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) +#define bit_buf (decoder.bitstream_buf) +#define bits (decoder.bitstream_bits) +#define bit_ptr (decoder.bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; + int delta; + int sign; + const MVtab * tab; + u16 code = UBITS(16); - if (bit_buf < 0xf8000000) + if ((code & 0x8000)) { - tab = DC_lum_5 + UBITS(bit_buf, 5); - size = tab->size; - - if (size) - { - DUMPBITS(bit_buf, bits, tab->len); - bits += size; - dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff; - } - else - { - DUMPBITS(bit_buf, bits, 3); - return 0; - } + DUMPBITS(1); + return 0x00010000; + } + else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00)) + { + tab = MV_4 + UBITS(4); + } + else + { + tab = MV_10 + UBITS(10); } - tab = DC_long + (UBITS(bit_buf, 9) - 0x1e0); //0x1e0); + delta = tab->delta + 1; + DUMPBITS(tab->len); + + sign = SBITS(1); + DUMPBITS(1); + return (delta ^ sign) - sign; - size = tab->size; - DUMPBITS(bit_buf, bits, tab->len); - NEEDBITS(bit_buf, bits, bit_ptr); - dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size); - DUMPBITS(bit_buf, bits, size); - return dc_diff; #undef bit_buf #undef bits #undef bit_ptr } -static __forceinline int get_chroma_dc_dct_diff(decoder_t * const decoder) +int __forceinline get_dmv() { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) + const DMVtab * tab; - const DCtab * tab; + tab = DMV_2 + UBITS(2); + DUMPBITS(tab->len); + return tab->dmv; +} + +int get_macroblock_address_increment() +{ + const MBAtab *mba; + + u16 code = UBITS(16); + + if (code >= 4096) + mba = MBA_5 + (UBITS(5) - 2); + else if (code >= 768) + mba = MBA_11 + (UBITS(11) - 24); + else switch (UBITS(11)) + { + + case 8: /* macroblock_escape */ + DUMPBITS(11); + return 0x23; + + case 15: /* macroblock_stuffing (MPEG1 only) */ + if (decoder.mpeg1) + { + DUMPBITS(11); + return 0x22; + } + + default: + return 0;//error + } + + DUMPBITS(mba->len); + + return mba->mba + 1; +} + +static __forceinline int get_luma_dc_dct_diff() +{ int size; int dc_diff; + u16 code = UBITS(5); - if (bit_buf < 0xf8000000) + if (code < 31) { - tab = DC_chrom_5 + UBITS(bit_buf, 5); - size = tab->size; + size = DClumtab0[code].size; + DUMPBITS(DClumtab0[code].len); - if (size) - { - DUMPBITS(bit_buf, bits, tab->len); - bits += size; - dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff; - } - else - { - DUMPBITS(bit_buf, bits, 2); - return 0; - } + // 5 bits max + } + else + { + code = UBITS(9) - 0x1f0; + size = DClumtab1[code].size; + DUMPBITS(DClumtab1[code].len); + + // 9 bits max + } + + if (size==0) + dc_diff = 0; + else + { + dc_diff = GETBITS(size); + + // 6 for tab0 and 11 for tab1 + if ((dc_diff & (1<<(size-1)))==0) + dc_diff-= (1<size; - DUMPBITS(bit_buf, bits, tab->len + 1); - NEEDBITS(bit_buf, bits, bit_ptr); - dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size); - DUMPBITS(bit_buf, bits, size); return dc_diff; -#undef bit_buf -#undef bits -#undef bit_ptr +} + +static __forceinline int get_chroma_dc_dct_diff() +{ + int size; + int dc_diff; + u16 code = UBITS(5); + + if (code<31) + { + size = DCchromtab0[code].size; + DUMPBITS(DCchromtab0[code].len); + } + else + { + code = UBITS(10) - 0x3e0; + size = DCchromtab1[code].size; + DUMPBITS(DCchromtab1[code].len); + } + + if (size==0) + dc_diff = 0; + else + { + dc_diff = GETBITS(size); + + if ((dc_diff & (1<<(size-1)))==0) + { + dc_diff-= (1< 4095)) \ - val = SBITS (val, 1) ^ 2047; \ + val = (((s32)val) >> 31) ^ 2047; \ } while (0) -static __forceinline void get_intra_block_B14(decoder_t * const decoder) +static __forceinline bool get_intra_block() { int i; int j; int val; - const u8 * scan = decoder->scan; - const u8 * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; - int mismatch; - const DCTtab * tab; - u32 bit_buf; - u8 * bit_ptr; - int bits; - s16 * dest; + const u8 * scan = decoder.scan; + const u8 * quant_matrix = decoder.intra_quantizer_matrix; + int quantizer_scale = decoder.quantizer_scale; + s16 * dest = decoder.DCTblock; + u16 code; - dest = decoder->DCTblock; - i = 0; - mismatch = ~dest[0]; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS(bit_buf, bits, bit_ptr); - - while (1) - { - if (bit_buf >= 0x28000000) + /* decode AC coefficients */ + for (i=1 + ipu_cmd.pos[4]; ; i++) + { + switch (ipu_cmd.pos[5]) + { + case 0: + if (!GETWORD()) { - tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5); - i += tab->run; - if (i >= 64) break; /* end of block */ - -normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1); - - SATURATE(val); - dest[j] = val; - mismatch ^= val; - bit_buf <<= 1; - NEEDBITS(bit_buf, bits, bit_ptr); - continue; + ipu_cmd.pos[4] = i - 1; + return false; } - else if (bit_buf >= 0x04000000) + + code = UBITS(16); + + if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1)) { - tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4); - i += tab->run; - - if (i < 64) goto normal_code; - - /* escape code */ - - i += UBITS(bit_buf << 6, 6) - 64; - - if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (SBITS(bit_buf, 12) * quantizer_scale * quant_matrix[i]) / 16; - - SATURATE(val); - dest[j] = val; - mismatch ^= val; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - continue; - + tab = &DCTtabnext[(code >> 12) - 4]; } - else if (bit_buf >= 0x02000000) + else if (code >= 1024) { - tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8); - i += tab->run; - - if (i < 64) goto normal_code; + if (decoder.intra_vlc_format && !decoder.mpeg1) + { + tab = &DCTtab0a[(code >> 8) - 4]; + } + else + { + tab = &DCTtab0[(code >> 8) - 4]; + } } - else if (bit_buf >= 0x00800000) + else if (code >= 512) { - tab = DCT_13 + (UBITS(bit_buf, 13) - 16); - i += tab->run; - - if (i < 64) goto normal_code; + if (decoder.intra_vlc_format && !decoder.mpeg1) + { + tab = &DCTtab1a[(code >> 6) - 8]; + } + else + { + tab = &DCTtab1[(code >> 6) - 8]; + } } - else if (bit_buf >= 0x00200000) + else if (code >= 256) { - tab = DCT_15 + (UBITS(bit_buf, 15) - 16); - i += tab->run; - - if (i < 64) goto normal_code; + tab = &DCTtab2[(code >> 4) - 16]; + } + else if (code >= 128) + { + tab = &DCTtab3[(code >> 3) - 16]; + } + else if (code >= 64) + { + tab = &DCTtab4[(code >> 2) - 16]; + } + else if (code >= 32) + { + tab = &DCTtab5[(code >> 1) - 16]; + } + else if (code >= 16) + { + tab = &DCTtab6[code - 16]; } else { - tab = DCT_16 + UBITS(bit_buf, 16); - bit_buf <<= 16; - GETWORD(&bit_buf, bits + 16); - i += tab->run; - - if (i < 64) goto normal_code; + ipu_cmd.pos[4] = 0; + return true; } - break; /* illegal, check needed to avoid buffer overflow */ - } + DUMPBITS(tab->len); - dest[63] ^= mismatch & 1; + if (tab->run==64) /* end_of_block */ + { + ipu_cmd.pos[4] = 0; + return true; + } + + i+= tab->run == 65 ? GETBITS(6) : tab->run; + if (i >= 64) + { + ipu_cmd.pos[4] = 0; + return true; + } + case 1: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i - 1; + ipu_cmd.pos[5] = 1; + return false; + } - if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1; + j = scan[i]; - DUMPBITS(bit_buf, bits, tab->len); /* dump end of block code */ + if (tab->run==65) /* escape */ + { + if(!decoder.mpeg1) + { + val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = (val * quantizer_scale * quant_matrix[i]) >> 4; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + if(decoder.mpeg1) + { + /* oddification */ + val = (val - 1) | 1; + } + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS(1)) - SBITS(1); + DUMPBITS(1); + } + + SATURATE(val); + dest[j] = val; + ipu_cmd.pos[5] = 0; + } + } + + ipu_cmd.pos[4] = 0; + return true; } -static __forceinline void get_intra_block_B15(decoder_t * const decoder) +static __forceinline bool get_non_intra_block(int * last) { int i; int j; int val; - const u8 * scan = decoder->scan; - const u8 * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; - int mismatch; - const DCTtab * tab; - u32 bit_buf; - u8 * bit_ptr; - int bits; - s16 * dest; + const u8 * scan = decoder.scan; + const u8 * quant_matrix = decoder.non_intra_quantizer_matrix; + int quantizer_scale = decoder.quantizer_scale; + s16 * dest = decoder.DCTblock; + u16 code; - dest = decoder->DCTblock; - i = 0; - mismatch = ~dest[0]; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS(bit_buf, bits, bit_ptr); - - while (1) - { - if (bit_buf >= 0x04000000) + /* decode AC coefficients */ + for (i= ipu_cmd.pos[4] ; ; i++) + { + switch (ipu_cmd.pos[5]) { - tab = DCT_B15_8 + (UBITS(bit_buf, 8) - 4); - i += tab->run; - - if (i < 64) + case 0: + if (!GETWORD()) { -normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + ipu_cmd.pos[4] = i; + return false; + } - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1); + code = UBITS(16); - SATURATE(val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS(bit_buf, bits, bit_ptr); - - continue; + if (code >= 16384) + { + if (i==0) + { + tab = &DCTtabfirst[(code >> 12) - 4]; + } + else + { + tab = &DCTtabnext[(code >> 12)- 4]; + } + } + else if (code >= 1024) + { + tab = &DCTtab0[(code >> 8) - 4]; + } + else if (code >= 512) + { + tab = &DCTtab1[(code >> 6) - 8]; + } + else if (code >= 256) + { + tab = &DCTtab2[(code >> 4) - 16]; + } + else if (code >= 128) + { + tab = &DCTtab3[(code >> 3) - 16]; + } + else if (code >= 64) + { + tab = &DCTtab4[(code >> 2) - 16]; + } + else if (code >= 32) + { + tab = &DCTtab5[(code >> 1) - 16]; + } + else if (code >= 16) + { + tab = &DCTtab6[code - 16]; } else { - /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ - //if (i >= 128) break; /* end of block */ - /* escape code */ - - i += UBITS(bit_buf << 6, 6) - 64; - - if (i >= 64) break; /* illegal, check against buffer overflow */ - - j = scan[i]; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (SBITS(bit_buf, 12) * quantizer_scale * quant_matrix[i]) / 16; - - SATURATE(val); - dest[j] = val; - mismatch ^= val; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - continue; + ipu_cmd.pos[4] = 0; + return true; } - } - else if (bit_buf >= 0x02000000) - { - tab = DCT_B15_10 + (UBITS(bit_buf, 10) - 8); - i += tab->run; - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00800000) - { - tab = DCT_13 + (UBITS(bit_buf, 13) - 16); - i += tab->run; + DUMPBITS(tab->len); - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00200000) - { - tab = DCT_15 + (UBITS(bit_buf, 15) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else - { - tab = DCT_16 + UBITS(bit_buf, 16); - bit_buf <<= 16; - GETWORD(&bit_buf, bits + 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - - break; /* illegal, check needed to avoid buffer overflow */ - } - - dest[63] ^= mismatch & 1; - - if ((bit_buf >> 28) != 0x6) - ipuRegs->ctrl.ECD = 1; - - DUMPBITS(bit_buf, bits, tab->len); /* dump end of block code */ - - decoder->bitstream_buf = bit_buf; - - decoder->bitstream_bits = bits; -} - -static __forceinline int get_non_intra_block(decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int i; - int j; - int val; - const u8 * scan = decoder->scan; - const u8 * quant_matrix = decoder->non_intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; - int mismatch; - const DCTtab * tab; - s16 * dest; - - i = -1; - mismatch = -1; - dest = decoder->DCTblock; - NEEDBITS(bit_buf, bits, bit_ptr); - - if (bit_buf >= 0x28000000) - { - tab = DCT_B14DC_5 + (UBITS(bit_buf, 5) - 5); - goto entry_1; - } - else - goto entry_2; - - while (1) - { - if (bit_buf >= 0x28000000) - { - tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5); -entry_1: - i += tab->run; - - if (i >= 64) break; /* end of block */ -normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1); - - SATURATE(val); - dest[j] = val; - mismatch ^= val; - bit_buf <<= 1; - NEEDBITS(bit_buf, bits, bit_ptr); - continue; - } -entry_2: - - if (bit_buf >= 0x04000000) - { - tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4); - i += tab->run; - - if (i < 64) goto normal_code; - - /* escape code */ - - i += UBITS(bit_buf << 6, 6) - 64; - - if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - val = 2 * (SBITS(bit_buf, 12) + SBITS(bit_buf, 1)) + 1; - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (val * quantizer_scale * quant_matrix[i]) / 32; - - SATURATE(val); - dest[j] = val; - mismatch ^= val; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - continue; - } - else if (bit_buf >= 0x02000000) - { - tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8); - i += tab->run; - - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00800000) - { - tab = DCT_13 + (UBITS(bit_buf, 13) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00200000) - { - tab = DCT_15 + (UBITS(bit_buf, 15) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else - { - tab = DCT_16 + UBITS(bit_buf, 16); - bit_buf <<= 16; - GETWORD(&bit_buf, bits + 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - - dest[63] ^= mismatch & 1; - - if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1; - - DUMPBITS(bit_buf, bits, tab->len); /* dump end of block code */ - - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - return i; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static __forceinline void get_mpeg1_intra_block(decoder_t * const decoder) -{ - int i; - int j; - int val; - const u8 * scan = decoder->scan; - const u8 * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; - const DCTtab * tab; - u32 bit_buf; - int bits; - u8 * bit_ptr; - s16 * dest; - - i = 0; - dest = decoder->DCTblock; - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - NEEDBITS(bit_buf, bits, bit_ptr); - - while (1) - { - if (bit_buf >= 0x28000000) - { - tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5); - i += tab->run; - - if (i >= 64) break; /* end of block */ - -normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1); - - SATURATE(val); - dest[j] = val; - bit_buf <<= 1; - NEEDBITS(bit_buf, bits, bit_ptr); - continue; - - } - else if (bit_buf >= 0x04000000) - { - tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4); - i += tab->run; - - if (i < 64) goto normal_code; - - /* escape code */ - - i += UBITS(bit_buf << 6, 6) - 64; - - if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - val = SBITS(bit_buf, 8); - - if (!(val & 0x7f)) + if (tab->run==64) /* end_of_block */ { - DUMPBITS(bit_buf, bits, 8); - val = UBITS(bit_buf, 8) + 2 * val; + *last = i; + ipu_cmd.pos[4] = 0; + return true; } - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (val * quantizer_scale * quant_matrix[i]) >> 4; - - /* oddification */ - val = (val + ~SBITS(val, 1)) | 1; - - SATURATE(val); - dest[j] = val; - DUMPBITS(bit_buf, bits, 8); - NEEDBITS(bit_buf, bits, bit_ptr); - continue; - } - else if (bit_buf >= 0x02000000) - { - tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8); - i += tab->run; - - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00800000) - { - tab = DCT_13 + (UBITS(bit_buf, 13) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00200000) - { - tab = DCT_15 + (UBITS(bit_buf, 15) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else - { - tab = DCT_16 + UBITS(bit_buf, 16); - bit_buf <<= 16; - GETWORD(&bit_buf, bits + 16); - i += tab->run; - goto normal_code; - } - - break; /* illegal, check needed to avoid buffer overflow */ - } - - if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1; - - DUMPBITS(bit_buf, bits, 2); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; -} - -static __forceinline int get_mpeg1_non_intra_block(decoder_t * const decoder) -{ - int i; - int j; - int val; - const u8 * scan = decoder->scan; - const u8 * quant_matrix = decoder->non_intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; - const DCTtab * tab; - u32 bit_buf; - int bits; - u8 * bit_ptr; - s16 * dest; - - i = -1; - dest = decoder->DCTblock; - - bit_buf = decoder->bitstream_buf; - bits = decoder->bitstream_bits; - bit_ptr = decoder->bitstream_ptr; - - NEEDBITS(bit_buf, bits, bit_ptr); - - if (bit_buf >= 0x28000000) - { - tab = DCT_B14DC_5 + (UBITS(bit_buf, 5) - 5); - goto entry_1; - } - else - goto entry_2; - - while (1) - { - if (bit_buf >= 0x28000000) - { - tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5); -entry_1: - i += tab->run; - - if (i >= 64) break; /* end of block */ - -normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1); - - SATURATE(val); - dest[j] = val; - bit_buf <<= 1; - NEEDBITS(bit_buf, bits, bit_ptr); - continue; - } -entry_2: - if (bit_buf >= 0x04000000) - { - tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4); - i += tab->run; - - if (i < 64) goto normal_code; - - /* escape code */ - - i += UBITS(bit_buf << 6, 6) - 64; - - if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - DUMPBITS(bit_buf, bits, 12); - NEEDBITS(bit_buf, bits, bit_ptr); - val = SBITS(bit_buf, 8); - - if (!(val & 0x7f)) + i += (tab->run == 65) ? GETBITS(6) : tab->run; + if (i >= 64) { - DUMPBITS(bit_buf, bits, 8); - val = UBITS(bit_buf, 8) + 2 * val; + *last = i; + ipu_cmd.pos[4] = 0; + return true; } - val = 2 * (val + SBITS(val, 1)) + 1; + case 1: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i; + ipu_cmd.pos[5] = 1; + return false; + } - /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */ - val = (val * quantizer_scale * quant_matrix[i]) / 32; + j = scan[i]; - /* oddification */ - val = (val + ~SBITS(val, 1)) | 1; + if (tab->run==65) /* escape */ + { + if (!decoder.mpeg1) + { + val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); + + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; + val = (val ^ SBITS(1)) - SBITS(1); + DUMPBITS(1); + } SATURATE(val); dest[j] = val; - DUMPBITS(bit_buf, bits, 8); - NEEDBITS(bit_buf, bits, bit_ptr); - continue; + ipu_cmd.pos[5] = 0; } - else if (bit_buf >= 0x02000000) - { - tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8); - i += tab->run; - - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00800000) - { - tab = DCT_13 + (UBITS(bit_buf, 13) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else if (bit_buf >= 0x00200000) - { - tab = DCT_15 + (UBITS(bit_buf, 15) - 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - else - { - tab = DCT_16 + UBITS(bit_buf, 16); - bit_buf <<= 16; - GETWORD(&bit_buf, bits + 16); - i += tab->run; - - if (i < 64) goto normal_code; - } - - break; /* illegal, check needed to avoid buffer overflow */ } - if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1; - - DUMPBITS(bit_buf, bits, 2); /* dump end of block code */ - decoder->bitstream_buf = bit_buf; - decoder->bitstream_bits = bits; - return i; + ipu_cmd.pos[4] = 0; + return true; } -static void __fastcall slice_intra_DCT(decoder_t * const decoder, const int cc, - u8 * const dest, const int stride) +static bool __fastcall slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) { - NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr); - /* Get the intra DC coefficient and inverse quantize it */ - - if (cc == 0) - decoder->dc_dct_pred[0] += get_luma_dc_dct_diff(decoder); - else - decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff(decoder); - - decoder->DCTblock[0] = decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision); - - if (decoder->mpeg1) + if (!skip || ipu_cmd.pos[3]) { - get_mpeg1_intra_block(decoder); - } - else if (decoder->intra_vlc_format) - { - get_intra_block_B15(decoder); - } - else - { - get_intra_block_B14(decoder); + ipu_cmd.pos[3] = 0; + if (!GETWORD()) + { + ipu_cmd.pos[3] = 1; + return false; + } + + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + decoder.dc_dct_pred[0] += get_luma_dc_dct_diff(); + else + decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff(); + + decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision); } - mpeg2_idct_copy(decoder->DCTblock, dest, stride); + if (!get_intra_block()) + { + return false; + } + + mpeg2_idct_copy(decoder.DCTblock, dest, stride); + + return true; } -/* JayteeMaster: changed dest to 16 bit signed */ -static void __fastcall slice_non_intra_DCT(decoder_t * const decoder, - /*u8*/s16 * const dest, const int stride) +static bool __fastcall slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) { int last; - memzero(decoder->DCTblock); - if (decoder->mpeg1) - last = get_mpeg1_non_intra_block(decoder); - else - last = get_non_intra_block(decoder); - - mpeg2_idct_add(last, decoder->DCTblock, dest, stride); -} - -#if defined(_MSC_VER) -#pragma pack(1) -#endif - -struct TGA_HEADER -{ - u8 identsize; // size of ID field that follows 18 u8 header (0 usually) - u8 colourmaptype; // type of colour map 0=none, 1=has palette - u8 imagetype; // type of image 0=none,1=indexed,2=rgb,3=grey,+8=rle packed - - s16 colourmapstart; // first colour map entry in palette - s16 colourmaplength; // number of colours in palette - u8 colourmapbits; // number of bits per palette entry 15,16,24,32 - - s16 xstart; // image x origin - s16 ystart; // image y origin - s16 width; // image width in pixels - s16 height; // image height in pixels - u8 bits; // image bits per pixel 8,16,24,32 - u8 descriptor; // image descriptor bits (vh flip bits) - - // pixel data follows header -} __packed; - -#if defined(_MSC_VER) -# pragma pack() -#endif - -void SaveTGA(const char* filename, int width, int height, void* pdata) -{ - TGA_HEADER hdr; - FILE* f = fopen(filename, "wb"); - - if (f == NULL) return; - - assert(sizeof(TGA_HEADER) == 18 && sizeof(hdr) == 18); - - memzero(hdr); - hdr.imagetype = 2; - hdr.bits = 32; - hdr.width = width; - hdr.height = height; - hdr.descriptor |= 8 | (1 << 5); // 8bit alpha, flip vertical - fwrite(&hdr, sizeof(hdr), 1, f); - fwrite(pdata, width*height*4, 1, f); - fclose(f); -} - -static int s_index = 0; //, s_frame = 0; - -void SaveRGB32(u8* ptr) -{ - char filename[255]; - sprintf(filename, "frames/frame%.4d.tga", s_index++); - SaveTGA(filename, 16, 16, ptr); -} - -void waitForSCD() -{ - u8 bit8 = 1; - - while (!getBits8((u8*)&bit8, 0)) + if (!skip) { - so_resume(); + memzero(decoder.DCTblock); } - if (bit8 == 0) + if (!get_non_intra_block(&last)) { - if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7); - - ipuRegs->ctrl.SCD = 1; + return false; } - while (!getBits32((u8*)&ipuRegs->top, 0)) - { - so_resume(); - } + mpeg2_idct_add(last, decoder.DCTblock, dest, stride); - BigEndian(ipuRegs->top, ipuRegs->top); - - /*if(ipuRegs->ctrl.SCD) - { - switch(ipuRegs->top & 0xFFFFFFF0) - { - case 0x100: - case 0x1A0: - break; - case 0x1B0: - ipuRegs->ctrl.SCD = 0; - if(ipuRegs->top == 0x1b4) ipuRegs->ctrl.ECD = 1; - //else - //{ - // do - // { - // while(!getBits32((u8*)&ipuRegs->top, 1)) - // { - // so_resume(); - // } - - // BigEndian(ipuRegs->top, ipuRegs->top); - // } - // while((ipuRegs->top & 0xfffffff0) != 0x100); - //} - break; - default: - ipuRegs->ctrl.SCD = 0; - break; - } - }*/ + return true; } -void __forceinline finishmpeg2sliceIDEC(decoder_t* &decoder) +void __forceinline finishmpeg2sliceIDEC() { ipuRegs->ctrl.SCD = 0; - coded_block_pattern = decoder->coded_block_pattern; + coded_block_pattern = decoder.coded_block_pattern; - g_BP.BP += decoder->bitstream_bits - 16; + g_BP.BP += decoder.bitstream_bits - 16; if ((int)g_BP.BP < 0) { @@ -1122,103 +691,133 @@ void __forceinline finishmpeg2sliceIDEC(decoder_t* &decoder) } FillInternalBuffer(&g_BP.BP, 1, 0); - - waitForSCD(); } -void mpeg2sliceIDEC(void* pdone) +bool mpeg2sliceIDEC() { u32 read; + u16 code; + u8 bit8; - bool resumed = false; - decoder_t *decoder = &g_decoder; - - *(int*)pdone = 0; - bitstream_init(decoder); - - decoder->dc_dct_pred[0] = - decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; - - decoder->mbc = 0; - ipuRegs->ctrl.ECD = 0; - - if (UBITS(decoder->bitstream_buf, 2) == 0) - { - ipuRegs->ctrl.SCD = 0; - } - else + switch (ipu_cmd.pos[0]) { + case 0: + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + + decoder.mbc = 0; + ipuRegs->top = 0; + ipuRegs->ctrl.ECD = 0; + + case 1: + ipu_cmd.pos[0] = 1; + if (!bitstream_init()) + { + return false; + } + + case 2: + ipu_cmd.pos[0] = 2; while (1) { int DCT_offset, DCT_stride; - int mba_inc; const MBAtab * mba; - NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr); - decoder->macroblock_modes = get_macroblock_modes(decoder); - - /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ - - if (decoder->macroblock_modes & MACROBLOCK_QUANT) //only IDEC + switch (ipu_cmd.pos[1]) { - decoder->quantizer_scale = get_quantizer_scale(decoder); - } + case 0: + decoder.macroblock_modes = get_macroblock_modes(); - if (decoder->macroblock_modes & DCT_TYPE_INTERLACED) - { - DCT_offset = decoder->stride; - DCT_stride = decoder->stride * 2; - } - else - { - DCT_offset = decoder->stride * 8; - DCT_stride = decoder->stride; - } - - if (decoder->macroblock_modes & MACROBLOCK_INTRA) - { - decoder->coded_block_pattern = 0x3F;//all 6 blocks - //ipuRegs->ctrl.CBP = 0x3f; - - memzero(*decoder->mb8); - memzero(*decoder->rgb32); - - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y, DCT_stride); - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride); - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset, DCT_stride); - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset + 8, DCT_stride); - slice_intra_DCT(decoder, 1, (u8*)decoder->mb8->Cb, decoder->stride >> 1); - slice_intra_DCT(decoder, 2, (u8*)decoder->mb8->Cr, decoder->stride >> 1); - - // Send The MacroBlock via DmaIpuFrom - - if (decoder->ofm == 0) + if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC { - ipu_csc(decoder->mb8, decoder->rgb32, decoder->sgn); + decoder.quantizer_scale = get_quantizer_scale(); + } - g_nIPU0Data = 64; - g_pIPU0Pointer = (u8*)decoder->rgb32; - //if ( s_frame >= 39 ) SaveRGB32(g_pIPU0Pointer); + decoder.coded_block_pattern = 0x3F;//all 6 blocks + memzero(*decoder.mb8); + memzero(*decoder.rgb32); + + case 1: + ipu_cmd.pos[1] = 1; + + if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) + { + DCT_offset = decoder.stride; + DCT_stride = decoder.stride * 2; } else { - ipu_csc(decoder->mb8, decoder->rgb32, decoder->sgn); - ipu_dither(decoder->rgb32, decoder->rgb16, decoder->dte); - - g_nIPU0Data = 32; - g_pIPU0Pointer = (u8*)decoder->rgb16; - //if ( s_frame >= 39 ) SaveRGB32(g_pIPU0Pointer); + DCT_offset = decoder.stride * 8; + DCT_stride = decoder.stride; } + switch (ipu_cmd.pos[2]) + { + case 0: + case 1: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[2] == 1)) + { + ipu_cmd.pos[2] = 1; + return false; + } + case 2: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[2] == 2)) + { + ipu_cmd.pos[2] = 2; + return false; + } + case 3: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3)) + { + ipu_cmd.pos[2] = 3; + return false; + } + case 4: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4)) + { + ipu_cmd.pos[2] = 4; + return false; + } + case 5: + if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[2] == 5)) + { + ipu_cmd.pos[2] = 5; + return false; + } + case 6: + if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[2] == 6)) + { + ipu_cmd.pos[2] = 6; + return false; + } + } + + // Send The MacroBlock via DmaIpuFrom + ipu_csc(decoder.mb8, decoder.rgb32, decoder.sgn); + + if (decoder.ofm == 0) + { + g_nIPU0Data = 64; + g_pIPU0Pointer = (u8*)decoder.rgb32; + } + else + { + ipu_dither(decoder.rgb32, decoder.rgb16, decoder.dte); + + g_nIPU0Data = 32; + g_pIPU0Pointer = (u8*)decoder.rgb16; + } + + case 2: while (g_nIPU0Data > 0) { read = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data); if (read == 0) { - so_resume(); - resumed = true; + ipu_cmd.pos[1] = 2; + return false; } else { @@ -1228,289 +827,322 @@ void mpeg2sliceIDEC(void* pdone) } } - decoder->mbc++; - } - - NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr); - mba_inc = 0; - - while (1) - { - if (decoder->bitstream_buf >= 0x10000000) + decoder.mbc++; + mbaCount = 0; + case 3: + while (1) { - mba = MBA_5 + (UBITS(decoder->bitstream_buf, 5) - 2); - break; - } - else if (decoder->bitstream_buf >= 0x03000000) - { - mba = MBA_11 + (UBITS(decoder->bitstream_buf, 11) - 24); - break; - } - else switch (UBITS(decoder->bitstream_buf, 11)) + if (!GETWORD()) { - - case 8: /* macroblock_escape */ - mba_inc += 33; - /* pass through */ - - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11); - NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr); - continue; - - default: /* end of slice/frame, or error? */ - { -#ifdef MPEGHACK - if (!resumed) so_resume(); -#endif - finishmpeg2sliceIDEC(decoder); - - *(int*)pdone = 1; - so_exit(); - } + ipu_cmd.pos[1] = 3; + return false; } - } - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, mba->len); - mba_inc += mba->mba; + code = UBITS(16); + if (code >= 0x1000) + { + mba = MBA_5 + (UBITS(5) - 2); + break; + } + else if (code >= 0x0300) + { + mba = MBA_11 + (UBITS(11) - 24); + break; + } + else switch (UBITS(11)) + { + case 8: /* macroblock_escape */ + mbaCount += 33; + /* pass through */ - if (mba_inc) - { - decoder->dc_dct_pred[0] = - decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS(11); + continue; - do - { - decoder->mbc++; + default: /* end of slice/frame, or error? */ + { + goto finish_idec; + } + } } - while (--mba_inc); + + DUMPBITS(mba->len); + mbaCount += mba->mba; + + if (mbaCount) + { + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + + decoder.mbc += mbaCount; + } + + case 4: + if (!GETWORD()) + { + ipu_cmd.pos[1] = 4; + return false; + } + + break; } + + ipu_cmd.pos[1] = 0; + ipu_cmd.pos[2] = 0; } + +finish_idec: + finishmpeg2sliceIDEC(); + + case 3: + bit8 = 1; + if (!getBits8((u8*)&bit8, 0)) + { + ipu_cmd.pos[0] = 3; + return false; + } + + if (bit8 == 0) + { + if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7); + + ipuRegs->ctrl.SCD = 1; + } + + case 4: + if (!getBits32((u8*)&ipuRegs->top, 0)) + { + ipu_cmd.pos[0] = 4; + return false; + } + + BigEndian(ipuRegs->top, ipuRegs->top); + break; } -#ifdef MPEGHACK - if (!resumed) so_resume(); -#endif - - finishmpeg2sliceIDEC(decoder); - - *(int*)pdone = 1; - so_exit(); + return true; } -void mpeg2_slice(void* pdone) +bool mpeg2_slice() { int DCT_offset, DCT_stride; - //u8 bit8=0; - //u32 fp = g_BP.FP; - u32 bp; - decoder_t * decoder = &g_decoder; - u32 size = 0; + u8 bit8; + u32 size; - *(int*)pdone = 0; - ipuRegs->ctrl.ECD = 0; - - memzero(*decoder->mb8); - memzero(*decoder->mb16); - - bitstream_init(decoder); - - if (decoder->dcr) + switch (ipu_cmd.pos[0]) { - decoder->dc_dct_pred[0] = - decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; - } - - if (decoder->macroblock_modes & DCT_TYPE_INTERLACED) - { - DCT_offset = decoder->stride; - DCT_stride = decoder->stride * 2; - } - else - { - DCT_offset = decoder->stride * 8; - DCT_stride = decoder->stride; - } - - if (decoder->macroblock_modes & MACROBLOCK_INTRA) - { - decoder->coded_block_pattern = 0x3F;//all 6 blocks - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y, DCT_stride); - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride); - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset, DCT_stride); - slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset + 8, DCT_stride); - slice_intra_DCT(decoder, 1, (u8*)decoder->mb8->Cb, decoder->stride >> 1); - slice_intra_DCT(decoder, 2, (u8*)decoder->mb8->Cr, decoder->stride >> 1); - ipu_copy(decoder->mb8, decoder->mb16); - } - else - { - if (decoder->macroblock_modes & MACROBLOCK_PATTERN) + case 0: + if (decoder.dcr) { - decoder->coded_block_pattern = get_coded_block_pattern(decoder); - /* JayteeMaster: changed from mb8 to mb16 and from u8 to s16 */ - - if (decoder->coded_block_pattern & 0x20) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y, DCT_stride); - if (decoder->coded_block_pattern & 0x10) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + 8, DCT_stride); - if (decoder->coded_block_pattern & 0x08) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + DCT_offset, DCT_stride); - if (decoder->coded_block_pattern & 0x04) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + DCT_offset + 8, DCT_stride); - if (decoder->coded_block_pattern & 0x2) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Cb, decoder->stride >> 1); - if (decoder->coded_block_pattern & 0x1) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Cr, decoder->stride >> 1); - + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; } - } - - //Send The MacroBlock via DmaIpuFrom - - size = 0; // Reset - ipuRegs->ctrl.SCD = 0; - coded_block_pattern = decoder->coded_block_pattern; - bp = g_BP.BP; - g_BP.BP += ((int)decoder->bitstream_bits - 16); - - // BP goes from 0 to 128, so negative values mean to read old buffer - // so we minus from 128 to get the correct BP - if ((int)g_BP.BP < 0) - { - g_BP.BP = 128 + (int)g_BP.BP; - - // After BP is positioned correctly, we need to reload the old buffer - // so that reading may continue properly - ReorderBitstream(); - } - - FillInternalBuffer(&g_BP.BP, 1, 0); - - decoder->mbc = 1; - g_nIPU0Data = 48; - g_pIPU0Pointer = (u8*)decoder->mb16; - - while (g_nIPU0Data > 0) - { - size = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data); - - if (size == 0) + + ipuRegs->ctrl.ECD = 0; + ipuRegs->top = 0; + memzero(*decoder.mb8); + memzero(*decoder.mb16); + case 1: + if (!bitstream_init()) { - so_resume(); + ipu_cmd.pos[0] = 1; + return false; + } + + case 2: + ipu_cmd.pos[0] = 2; + + if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) + { + DCT_offset = decoder.stride; + DCT_stride = decoder.stride * 2; } else { - g_pIPU0Pointer += size * 16; - g_nIPU0Data -= size; - } - } - waitForSCD(); - - decoder->bitstream_bits = 0; - *(int*)pdone = 1; - so_exit(); -} - -int __forceinline get_motion_delta(decoder_t * const decoder, - const int f_code) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - - int delta; - int sign; - const MVtab * tab; - - if ((bit_buf & 0x80000000)) - { - DUMPBITS(bit_buf, bits, 1); - return 0x00010000; - } - else if ((bit_buf & 0xf0000000) || ((bit_buf & 0xfc000000) == 0x0c000000)) - { - - tab = MV_4 + UBITS(bit_buf, 4); - delta = (tab->delta << f_code) + 1; - bits += tab->len + f_code + 1; - bit_buf <<= tab->len; - - sign = SBITS(bit_buf, 1); - bit_buf <<= 1; - - if (f_code) delta += UBITS(bit_buf, f_code); - - bit_buf <<= f_code; - - return (delta ^ sign) - sign; - - } - else - { - tab = MV_10 + UBITS(bit_buf, 10); - delta = (tab->delta << f_code) + 1; - bits += tab->len + 1; - bit_buf <<= tab->len; - - sign = SBITS(bit_buf, 1); - bit_buf <<= 1; - - if (f_code) - { - NEEDBITS(bit_buf, bits, bit_ptr); - delta += UBITS(bit_buf, f_code); - DUMPBITS(bit_buf, bits, f_code); + DCT_offset = decoder.stride * 8; + DCT_stride = decoder.stride; } - return (delta ^ sign) - sign; - - } - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -int __forceinline get_dmv(decoder_t * const decoder) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - - const DMVtab * tab; - - tab = DMV_2 + UBITS(bit_buf, 2); - DUMPBITS(bit_buf, bits, tab->len); - return tab->dmv; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -int get_macroblock_address_increment(decoder_t * const decoder) -{ - const MBAtab *mba; - - if (decoder->bitstream_buf >= 0x10000000) - mba = MBA_5 + (UBITS(decoder->bitstream_buf, 5) - 2); - else if (decoder->bitstream_buf >= 0x03000000) - mba = MBA_11 + (UBITS(decoder->bitstream_buf, 11) - 24); - else switch (UBITS(decoder->bitstream_buf, 11)) + if (decoder.macroblock_modes & MACROBLOCK_INTRA) { - - case 8: /* macroblock_escape */ - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11); - return 0x23; - - case 15: /* macroblock_stuffing (MPEG1 only) */ - if (decoder->mpeg1) + switch(ipu_cmd.pos[1]) + { + case 0: + decoder.coded_block_pattern = 0x3F; + case 1: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[1] == 1)) { - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11); - return 0x22; + ipu_cmd.pos[1] = 1; + return false; } + case 2: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) + { + ipu_cmd.pos[1] = 2; + return false; + } + case 3: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) + { + ipu_cmd.pos[1] = 3; + return false; + } + case 4: + if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) + { + ipu_cmd.pos[1] = 4; + return false; + } + case 5: + if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5)) + { + ipu_cmd.pos[1] = 5; + return false; + } + case 6: + if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6)) + { + ipu_cmd.pos[1] = 6; + return false; + } + break; + } - default: - return 0;//error + ipu_copy(decoder.mb8, decoder.mb16); + } + else + { + if (decoder.macroblock_modes & MACROBLOCK_PATTERN) + { + switch(ipu_cmd.pos[1]) + { + case 0: + decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits + case 1: + if (decoder.coded_block_pattern & 0x20) + { + if (!slice_non_intra_DCT((s16*)decoder.mb16->Y, DCT_stride, ipu_cmd.pos[1] == 1)) + { + ipu_cmd.pos[1] = 1; + return false; + } + } + case 2: + if (decoder.coded_block_pattern & 0x10) + { + if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) + { + ipu_cmd.pos[1] = 2; + return false; + } + } + case 3: + if (decoder.coded_block_pattern & 0x08) + { + if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) + { + ipu_cmd.pos[1] = 3; + return false; + } + } + case 4: + if (decoder.coded_block_pattern & 0x04) + { + if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) + { + ipu_cmd.pos[1] = 4; + return false; + } + } + case 5: + if (decoder.coded_block_pattern & 0x2) + { + if (!slice_non_intra_DCT((s16*)decoder.mb16->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5)) + { + ipu_cmd.pos[1] = 5; + return false; + } + } + case 6: + if (decoder.coded_block_pattern & 0x1) + { + if (!slice_non_intra_DCT((s16*)decoder.mb16->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6)) + { + ipu_cmd.pos[1] = 6; + return false; + } + } + break; + } + } } - DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, mba->len); + //Send The MacroBlock via DmaIpuFrom + size = 0; // Reset + ipuRegs->ctrl.SCD = 0; + coded_block_pattern = decoder.coded_block_pattern; + g_BP.BP += (int)decoder.bitstream_bits - 16; - return mba->mba + 1; -} + // BP goes from 0 to 128, so negative values mean to read old buffer + // so we minus from 128 to get the correct BP + if ((int)g_BP.BP < 0) + { + g_BP.BP = 128 + (int)g_BP.BP; + + // After BP is positioned correctly, we need to reload the old buffer + // so that reading may continue properly + ReorderBitstream(); + } + + decoder.mbc = 1; + g_nIPU0Data = 48; + g_pIPU0Pointer = (u8*)decoder.mb16; + + case 3: + while (g_nIPU0Data > 0) + { + size = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data); + + if (size == 0) + { + ipu_cmd.pos[0] = 3; + return false; + } + else + { + g_pIPU0Pointer += size * 16; + g_nIPU0Data -= size; + } + } + + case 4: + bit8 = 1; + if (!getBits8((u8*)&bit8, 0)) + { + ipu_cmd.pos[0] = 4; + return false; + } + + if (bit8 == 0) + { + if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7); + + ipuRegs->ctrl.SCD = 1; + } + + case 5: + if (!getBits32((u8*)&ipuRegs->top, 0)) + { + ipu_cmd.pos[0] = 5; + return false; + } + + BigEndian(ipuRegs->top, ipuRegs->top); + decoder.bitstream_bits = 0; + break; + } + + return true; +} \ No newline at end of file diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h index 3c8cb79e11..2860e4f53b 100644 --- a/pcsx2/IPU/mpeg2lib/Mpeg.h +++ b/pcsx2/IPU/mpeg2lib/Mpeg.h @@ -99,7 +99,6 @@ struct decoder_t { /* bit parsing stuff */ u32 bitstream_buf; /* current 32 bit working set */ int bitstream_bits; /* used bits in working set */ - u8 * bitstream_ptr; /* buffer with stream data; 128 bits buffer */ struct macroblock_8 *mb8; struct macroblock_16 *mb16; @@ -173,13 +172,13 @@ extern void (__fastcall *mpeg2_idct_add) (int last, s16 * block, s16* dest, int #define IDEC 0 #define BDEC 1 -void mpeg2sliceIDEC(void* pdone); -void mpeg2_slice(void* pdone); -int get_macroblock_address_increment(decoder_t * const decoder); -int get_macroblock_modes (decoder_t * const decoder); +bool mpeg2sliceIDEC(); +bool mpeg2_slice(); +int get_macroblock_address_increment(); +int get_macroblock_modes(); -extern int get_motion_delta (decoder_t * const decoder, const int f_code); -extern int get_dmv (decoder_t * const decoder); +extern int get_motion_delta(const int f_code); +extern int get_dmv(); extern int non_linear_quantizer_scale[]; extern decoder_t g_decoder; @@ -189,7 +188,7 @@ void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb1 void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4); void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16); -int slice (decoder_t * const decoder, u8 * buffer); +int slice (u8 * buffer); /* idct.c */ void mpeg2_idct_init (); @@ -199,4 +198,10 @@ void mpeg2_idct_init (); #define BigEndian(out, in) out = __builtin_bswap32(in) // or we could use the asm function bswap... #endif +#ifdef _MSC_VER +#define BigEndian64(out, in) out = _byteswap_uint64(in) +#else +#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap... +#endif + #endif//__MPEG_H__ diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h index 4867b2175c..69727beac6 100644 --- a/pcsx2/IPU/mpeg2lib/Vlc.h +++ b/pcsx2/IPU/mpeg2lib/Vlc.h @@ -25,55 +25,70 @@ #ifndef __VLC_H__ #define __VLC_H__ -#include "IPU/coroutine.h" - static u8 data[2]; -static u8 dword[4]; +//static u8 word[4]; +//static u8 dword[8]; +//static u8 qword[16]; extern tIPU_BP g_BP; -extern decoder_t g_decoder; +extern decoder_t decoder; extern void ReorderBitstream(); -static __forceinline void GETWORD(u32 * bit_buf,int bits) +static __forceinline int GETWORD() { - while(!getBits16(data,1)) + if (decoder.bitstream_bits > 0) { - so_resume(); + if(!getBits16(data,1)) + { + return 0; + } + + /*u32 data; + BigEndian(data, *(u32*)word); + decoder.bitstream_buf |= (u64)data << decoder.bitstream_bits; + decoder.bitstream_bits -= 32;*/ + decoder.bitstream_buf |= ((u32)(((u16)data[0] << 8) | data[1])) << decoder.bitstream_bits; + decoder.bitstream_bits -= 16; } - *bit_buf |= ((data[0] << 8) | data[1]) << (bits); + + return 1; } -static __forceinline void bitstream_init (decoder_t * decoder){ - decoder->bitstream_bits = -16; +static __forceinline int bitstream_init () +{ + if (!getBits32((u8*)&decoder.bitstream_buf, 1)) + { + return 0; + } - while( !getBits32(dword, 1) ) - so_resume(); + decoder.bitstream_bits = -16; + BigEndian(decoder.bitstream_buf, decoder.bitstream_buf); + /*decoder.bitstream_buf = *(u64*)dword; + BigEndian64(decoder.bitstream_buf, decoder.bitstream_buf);*/ - decoder->bitstream_buf = (dword[0] << 24) | (dword[1] << 16) | - (dword[2] << 8) |dword[3]; + return 1; } -/* make sure that there are at least 16 valid bits in bit_buf */ -#define NEEDBITS(bit_buf,bits,bit_ptr) \ -do { \ - if (bits > 0) { \ - GETWORD(&bit_buf,bits); \ - bits -= 16; \ - } \ -} while (0) - /* remove num valid bits from bit_buf */ -#define DUMPBITS(bit_buf,bits,num) \ -do { \ - /*IPU_LOG("DUMPBITS %d\n",num);*/ \ - bit_buf <<= (num); \ - bits += (num); \ -} while (0) +static __forceinline void DUMPBITS(int num) +{ + decoder.bitstream_buf <<= num; + decoder.bitstream_bits += num; +} /* take num bits from the high part of bit_buf and zero extend them */ -#define UBITS(bit_buf,num) (((u32)(bit_buf)) >> (32 - (num))) +#define UBITS(num) (((u32)decoder.bitstream_buf) >> (32 - (num))) /* take num bits from the high part of bit_buf and sign extend them */ -#define SBITS(bit_buf,num) (((s32)(bit_buf)) >> (32 - (num))) +#define SBITS(num) (((s32)decoder.bitstream_buf) >> (32 - (num))) + +/* Get bits from bitstream */ +static __forceinline u32 GETBITS(int num) +{ + u16 retVal = UBITS(num); + DUMPBITS(num); + + return retVal; +} struct MBtab { u8 modes; @@ -443,4 +458,247 @@ static const MBAtab MBA_11 [] = { { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} }; + +// New + + +/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */ +static MBAtab MBAtab1[16] = +{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4}, + {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3} +}; + +/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */ +static MBAtab MBAtab2[104] = +{ + {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11}, + {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10}, + {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10}, + {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, + {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, + {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, + {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, + {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, + {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, + {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, + {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, + {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, + {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7} +}; + +/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ +static const DCtab DClumtab0[32] = +{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} +}; + +/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ +static const DCtab DClumtab1[16] = +{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} +}; + +/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ +static const DCtab DCchromtab0[32] = +{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} +}; + +/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ +static const DCtab DCchromtab1[32] = +{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, + {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} +}; + +/* Table B-14, DCT coefficients table zero, + * codes 0100 ... 1xxx (used for first (DC) coefficient) + */ +static const DCTtab DCTtabfirst[12] = +{ + {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, + {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, + {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} +}; + +/* Table B-14, DCT coefficients table zero, + * codes 0100 ... 1xxx (used for all other coefficients) + */ +static const DCTtab DCTtabnext[12] = +{ + {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, + {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */ + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} +}; + +/* Table B-14, DCT coefficients table zero, + * codes 000001xx ... 00111xxx + */ +static const DCTtab DCTtab0[60] = +{ + {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ + {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7}, + {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7}, + {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, + {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6}, + {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6}, + {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, + {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8}, + {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8}, + {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, + {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, + {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, + {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, + {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, + {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} +}; + +/* Table B-15, DCT coefficients table one, + * codes 000001xx ... 11111111 +*/ +static const DCTtab DCTtab0a[252] = +{ + {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ + {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7}, + {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7}, + {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, + {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6}, + {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6}, + {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, + {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8}, + {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8}, + {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, + {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, + {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, + {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, + {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, + {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, + {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */ + {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, + {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, + {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, + {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, + {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, + {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, + {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, + {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, + {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, + {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, + {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, + {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7}, + {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7}, + {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8}, + {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} +}; + +/* Table B-14, DCT coefficients table zero, + * codes 0000001000 ... 0000001111 + */ +static const DCTtab DCTtab1[8] = +{ + {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10}, + {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} +}; + +/* Table B-15, DCT coefficients table one, + * codes 000000100x ... 000000111x + */ +static const DCTtab DCTtab1a[8] = +{ + {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9}, + {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} +}; + +/* Table B-14/15, DCT coefficients table zero / one, + * codes 000000010000 ... 000000011111 + */ +static const DCTtab DCTtab2[16] = +{ + {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12}, + {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12}, + {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12}, + {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} +}; + +/* Table B-14/15, DCT coefficients table zero / one, + * codes 0000000010000 ... 0000000011111 + */ +static const DCTtab DCTtab3[16] = +{ + {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13}, + {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13}, + {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13}, + {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} +}; + +/* Table B-14/15, DCT coefficients table zero / one, + * codes 00000000010000 ... 00000000011111 + */ +static const DCTtab DCTtab4[16] = +{ + {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14}, + {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14}, + {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14}, + {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} +}; + +/* Table B-14/15, DCT coefficients table zero / one, + * codes 000000000010000 ... 000000000011111 + */ +static const DCTtab DCTtab5[16] = +{ + {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15}, + {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15}, + {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15}, + {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} +}; + +/* Table B-14/15, DCT coefficients table zero / one, + * codes 0000000000010000 ... 0000000000011111 + */ +static const DCTtab DCTtab6[16] = +{ + {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16}, + {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16}, + {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16}, + {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} +}; + #endif//__VLC_H__ diff --git a/pcsx2/NakedAsm.h b/pcsx2/NakedAsm.h index c14e6e7d6b..0446e243c8 100644 --- a/pcsx2/NakedAsm.h +++ b/pcsx2/NakedAsm.h @@ -17,17 +17,6 @@ #ifndef NAKED_ASM_H #define NAKED_ASM_H -#include "IPU/coroutine.h" - -// Common to Windows and Linux -extern "C" -{ - // acoroutine.S - void so_call(coroutine_t coro); - void so_resume(void); - void so_exit(void); -} - #ifdef __LINUX__ extern "C" diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 9c67ac3571..f76dc73072 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -1254,14 +1254,6 @@ - - - - @@ -1270,7 +1262,7 @@ >