diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp
index 10593fbd2c..7b422b6c8d 100644
--- a/pcsx2/IPU/IPU.cpp
+++ b/pcsx2/IPU/IPU.cpp
@@ -36,12 +36,9 @@
// IPU Inline'd IRQs : Calls the IPU interrupt handlers directly instead of
// feeding them through the EE's branch test. (see IPU.h for details)
-
-
-
-static tIPU_DMA g_nDMATransfer(0);
-static tIPU_cmd ipu_cmd;
-static IPUStatus IPU1Status;
+tIPU_DMA g_nDMATransfer(0);
+tIPU_cmd ipu_cmd;
+IPUStatus IPU1Status;
// FIXME - g_nIPU0Data and Pointer are not saved in the savestate, which breaks savestates for some
// FMVs at random (if they get saved during the half frame of a 30fps rate). The fix is complicated
@@ -53,9 +50,6 @@ void ReorderBitstream();
// the BP doesn't advance and returns -1 if there is no data to be read
tIPU_BP g_BP;
-static coroutine_t s_routine; // used for executing BDEC/IDEC
-static int s_RoutineDone = 0;
-static u32 s_tempstack[0x4000]; // 64k
void IPUWorker();
@@ -78,7 +72,7 @@ __aligned16 macroblock_rgb16 rgb16;
u8 indx4[16*16/2];
bool mpeg2_inited = false; //mpeg2_idct_init() must be called only once
u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'};
-decoder_t g_decoder; //static, only to place it in bss
+decoder_t decoder; //static, only to place it in bss
decoder_t tempdec;
extern "C"
@@ -98,14 +92,14 @@ __forceinline void IPUProcessInterrupt()
void init_g_decoder()
{
//other stuff
- g_decoder.intra_quantizer_matrix = (u8*)iq;
- g_decoder.non_intra_quantizer_matrix = (u8*)niq;
- g_decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
- g_decoder.mb8 = &mb8;
- g_decoder.mb16 = &mb16;
- g_decoder.rgb32 = &rgb32;
- g_decoder.rgb16 = &rgb16;
- g_decoder.stride = 16;
+ decoder.intra_quantizer_matrix = (u8*)iq;
+ decoder.non_intra_quantizer_matrix = (u8*)niq;
+ decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
+ decoder.mb8 = &mb8;
+ decoder.mb16 = &mb16;
+ decoder.rgb32 = &rgb32;
+ decoder.rgb16 = &rgb16;
+ decoder.stride = 16;
}
void mpeg2_init()
@@ -159,7 +153,7 @@ void ReportIPU()
Console.WriteLn("vqclut = 0x%x.", vqclut);
Console.WriteLn("s_thresh = 0x%x.", s_thresh);
Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern);
- Console.WriteLn("g_decoder = 0x%x.", g_decoder);
+ Console.WriteLn("g_decoder = 0x%x.", decoder);
Console.WriteLn("mpeg2: scan_norm = 0x%x, alt = 0x%x.", mpeg2_scan_norm, mpeg2_scan_alt);
Console.WriteLn(ipu_cmd.desc());
Console.WriteLn("_readbits = 0x%x. readbits - _readbits, which is also frozen, is 0x%x.",
@@ -186,7 +180,7 @@ void SaveStateBase::ipuFreeze()
Freeze(vqclut);
Freeze(s_thresh);
Freeze(coded_block_pattern);
- Freeze(g_decoder);
+ Freeze(decoder);
Freeze(mpeg2_scan_norm);
Freeze(mpeg2_scan_alt);
@@ -377,72 +371,67 @@ static void ipuBCLR(u32 val)
IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP);
}
-static BOOL ipuIDEC(u32 val)
+static BOOL ipuIDEC(u32 val, bool resume)
{
tIPU_CMD_IDEC idec(val);
- idec.log();
- g_BP.BP += idec.FB;//skip FB bits
- //from IPU_CTRL
- ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;)
- g_decoder.coding_type = ipuRegs->ctrl.PCT;
- g_decoder.mpeg1 = ipuRegs->ctrl.MP1;
- g_decoder.q_scale_type = ipuRegs->ctrl.QST;
- g_decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
- g_decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
- g_decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
+ if (!resume)
+ {
+ idec.log();
+ g_BP.BP += idec.FB;//skip FB bits
+ //from IPU_CTRL
+ ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;)
+ decoder.coding_type = ipuRegs->ctrl.PCT;
+ decoder.mpeg1 = ipuRegs->ctrl.MP1;
+ decoder.q_scale_type = ipuRegs->ctrl.QST;
+ decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
+ decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
+ decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
- //from IDEC value
- g_decoder.quantizer_scale = idec.QSC;
- g_decoder.frame_pred_frame_dct = !idec.DTD;
- g_decoder.sgn = idec.SGN;
- g_decoder.dte = idec.DTE;
- g_decoder.ofm = idec.OFM;
+ //from IDEC value
+ decoder.quantizer_scale = idec.QSC;
+ decoder.frame_pred_frame_dct = !idec.DTD;
+ decoder.sgn = idec.SGN;
+ decoder.dte = idec.DTE;
+ decoder.ofm = idec.OFM;
- //other stuff
- g_decoder.dcr = 1; // resets DC prediction value
+ //other stuff
+ decoder.dcr = 1; // resets DC prediction value
+ }
- s_routine = so_create(mpeg2sliceIDEC, &s_RoutineDone, s_tempstack, sizeof(s_tempstack));
- pxAssert(s_routine != NULL);
- so_call(s_routine);
- if (s_RoutineDone) s_routine = NULL;
-
- return s_RoutineDone;
+ return mpeg2sliceIDEC();
}
static int s_bdec = 0;
-static __forceinline BOOL ipuBDEC(u32 val)
+static __forceinline BOOL ipuBDEC(u32 val, bool resume)
{
tIPU_CMD_BDEC bdec(val);
- bdec.log(s_bdec);
- if (IsDebugBuild) s_bdec++;
+ if (!resume)
+ {
+ bdec.log(s_bdec);
+ if (IsDebugBuild) s_bdec++;
- g_BP.BP += bdec.FB;//skip FB bits
- g_decoder.coding_type = I_TYPE;
- g_decoder.mpeg1 = ipuRegs->ctrl.MP1;
- g_decoder.q_scale_type = ipuRegs->ctrl.QST;
- g_decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
- g_decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
- g_decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
+ g_BP.BP += bdec.FB;//skip FB bits
+ decoder.coding_type = I_TYPE;
+ decoder.mpeg1 = ipuRegs->ctrl.MP1;
+ decoder.q_scale_type = ipuRegs->ctrl.QST;
+ decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
+ decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
+ decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
- //from BDEC value
- /* JayteeMaster: the quantizer (linear/non linear) depends on the q_scale_type */
- g_decoder.quantizer_scale = g_decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
- g_decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
- g_decoder.dcr = bdec.DCR;
- g_decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
+ //from BDEC value
+ decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
+ decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
+ decoder.dcr = bdec.DCR;
+ decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
- memzero(mb8);
- memzero(mb16);
+ memzero(mb8);
+ memzero(mb16);
+ }
- s_routine = so_create(mpeg2_slice, &s_RoutineDone, s_tempstack, sizeof(s_tempstack));
- pxAssert(s_routine != NULL);
- so_call(s_routine);
-
- if (s_RoutineDone) s_routine = NULL;
- return s_RoutineDone;
+ return mpeg2_slice();
}
static BOOL __fastcall ipuVDEC(u32 val)
@@ -451,34 +440,34 @@ static BOOL __fastcall ipuVDEC(u32 val)
{
case 0:
ipuRegs->cmd.DATA = 0;
- if (!getBits32((u8*)&g_decoder.bitstream_buf, 0)) return FALSE;
+ if (!getBits32((u8*)&decoder.bitstream_buf, 0)) return FALSE;
- g_decoder.bitstream_bits = -16;
- BigEndian(g_decoder.bitstream_buf, g_decoder.bitstream_buf);
+ decoder.bitstream_bits = -16;
+ BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
switch ((val >> 26) & 3)
{
case 0://Macroblock Address Increment
- g_decoder.mpeg1 = ipuRegs->ctrl.MP1;
- ipuRegs->cmd.DATA = get_macroblock_address_increment(&g_decoder);
+ decoder.mpeg1 = ipuRegs->ctrl.MP1;
+ ipuRegs->cmd.DATA = get_macroblock_address_increment();
break;
- case 1://Macroblock Type //known issues: no error detected
- g_decoder.frame_pred_frame_dct = 1;//prevent DCT_TYPE_INTERLACED
- g_decoder.coding_type = ipuRegs->ctrl.PCT;
- ipuRegs->cmd.DATA = get_macroblock_modes(&g_decoder);
+ case 1://Macroblock Type
+ decoder.frame_pred_frame_dct = 1;
+ decoder.coding_type = ipuRegs->ctrl.PCT;
+ ipuRegs->cmd.DATA = get_macroblock_modes();
break;
- case 2://Motion Code //known issues: no error detected
- ipuRegs->cmd.DATA = get_motion_delta(&g_decoder, 0);
+ case 2://Motion Code
+ ipuRegs->cmd.DATA = get_motion_delta(0);
break;
case 3://DMVector
- ipuRegs->cmd.DATA = get_dmv(&g_decoder);
+ ipuRegs->cmd.DATA = get_dmv();
break;
}
- g_BP.BP += (g_decoder.bitstream_bits + 16);
+ g_BP.BP += (int)decoder.bitstream_bits + 16;
if ((int)g_BP.BP < 0)
{
@@ -486,9 +475,7 @@ static BOOL __fastcall ipuVDEC(u32 val)
ReorderBitstream();
}
- FillInternalBuffer(&g_BP.BP, 1, 0);
-
- ipuRegs->cmd.DATA = (ipuRegs->cmd.DATA & 0xFFFF) | ((g_decoder.bitstream_bits + 16) << 16);
+ ipuRegs->cmd.DATA = (ipuRegs->cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16);
ipuRegs->ctrl.ECD = (ipuRegs->cmd.DATA == 0);
case 1:
@@ -529,7 +516,10 @@ static BOOL ipuSETIQ(u32 val)
if ((val >> 27) & 1)
{
- ipu_cmd.pos[0] += getBits((u8*)niq + ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1); // 8*8*8
+ for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
+ {
+ if (!getBits64((u8*)niq + 8 * ipu_cmd.pos[0], 1)) return FALSE;
+ }
IPU_LOG("Read non-intra quantization matrix from IPU FIFO.");
for (i = 0; i < 8; i++)
@@ -541,7 +531,10 @@ static BOOL ipuSETIQ(u32 val)
}
else
{
- ipu_cmd.pos[0] += getBits((u8*)iq + 8 * ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1);
+ for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
+ {
+ if (!getBits64((u8*)iq + 8 * ipu_cmd.pos[0], 1)) return FALSE;
+ }
IPU_LOG("Read intra quantization matrix from IPU FIFO.");
for (i = 0; i < 8; i++)
@@ -552,40 +545,40 @@ static BOOL ipuSETIQ(u32 val)
}
}
- return ipu_cmd.pos[0] == 64;
+ return TRUE;
}
static BOOL ipuSETVQ(u32 val)
{
- ipu_cmd.pos[0] += getBits((u8*)vqclut + ipu_cmd.pos[0], 256 - 8 * ipu_cmd.pos[0], 1); // 16*2*8
-
- if (ipu_cmd.pos[0] == 32)
+ for(;ipu_cmd.pos[0] < 4; ipu_cmd.pos[0]++)
{
- IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO.");
- IPU_LOG(
- "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
- "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d"
- "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
- "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
- vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
- vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
- vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
- vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
- vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
- vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
- vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
- vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
- vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
- vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
- vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
- vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
- vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
- vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
- vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
- vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
+ if (!getBits64((u8*)vqclut + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
- return ipu_cmd.pos[0] == 32;
+ IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO.");
+ IPU_LOG(
+ "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
+ "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d"
+ "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
+ "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
+ vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
+ vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
+ vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
+ vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
+ vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
+ vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
+ vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
+ vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
+ vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
+ vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
+ vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
+ vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
+ vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
+ vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
+ vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
+ vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
+
+ return TRUE;
}
// IPU Transfers are split into 8Qwords so we need to send ALL the data
@@ -596,17 +589,14 @@ static BOOL __fastcall ipuCSC(u32 val)
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
{
-
- if (ipu_cmd.pos[0] < 3072 / 8)
+ for(;ipu_cmd.pos[0] < 48; ipu_cmd.pos[0]++)
{
- ipu_cmd.pos[0] += getBits((u8*) & mb8 + ipu_cmd.pos[0], 3072 - 8 * ipu_cmd.pos[0], 1);
-
- if (ipu_cmd.pos[0] < 3072 / 8) return FALSE;
-
- ipu_csc(&mb8, &rgb32, 0);
- if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE);
+ if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
+ ipu_csc(&mb8, &rgb32, 0);
+ if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE);
+
if (csc.OFM)
{
while (ipu_cmd.pos[1] < 32)
@@ -641,18 +631,16 @@ static BOOL ipuPACK(u32 val)
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
{
- if (ipu_cmd.pos[0] < 512)
+ for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
{
- ipu_cmd.pos[0] += getBits((u8*) & mb8 + ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1);
-
- if (ipu_cmd.pos[0] < 64) return FALSE;
-
- ipu_csc(&mb8, &rgb32, 0);
- ipu_dither(&rgb32, &rgb16, csc.DTE);
-
- if (csc.OFM) ipu_vq(&rgb16, indx4);
+ if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
+ ipu_csc(&mb8, &rgb32, 0);
+ ipu_dither(&rgb32, &rgb16, csc.DTE);
+
+ if (csc.OFM) ipu_vq(&rgb16, indx4);
+
if (csc.OFM)
{
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
@@ -696,7 +684,7 @@ void IPUCMD_WRITE(u32 val)
ipuRegs->ctrl.ECD = 0;
ipuRegs->ctrl.SCD = 0; //clear ECD/SCD
ipuRegs->cmd.DATA = val;
- ipu_cmd.pos[0] = 0;
+ ipu_cmd.clear();
switch (ipuRegs->cmd.CMD)
{
@@ -759,29 +747,27 @@ void IPUCMD_WRITE(u32 val)
break;
case SCE_IPU_IDEC:
- if (ipuIDEC(val))
+ if (ipuIDEC(val, false))
{
// idec done, ipu0 done too
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
return;
}
+
ipuRegs->topbusy = 0x80000000;
- // have to resort to the thread
- ipu_cmd.current = val >> 28;
- ipuRegs->ctrl.BUSY = 1;
- return;
+ break;
case SCE_IPU_BDEC:
- if (ipuBDEC(val))
+ if (ipuBDEC(val, false))
{
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
}
- ipuRegs->topbusy = 0x80000000;
- ipu_cmd.current = val >> 28;
- ipuRegs->ctrl.BUSY = 1;
- return;
+ else
+ {
+ ipuRegs->topbusy = 0x80000000;
+ }
}
// have to resort to the thread
@@ -850,8 +836,7 @@ void IPUWorker()
break;
case SCE_IPU_IDEC:
- so_call(s_routine);
- if (!s_RoutineDone)
+ if (!ipuIDEC(ipuRegs->cmd.DATA, true))
{
if(ipu1dma->chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
@@ -865,12 +850,10 @@ void IPUWorker()
// CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
- s_routine = NULL;
break;
case SCE_IPU_BDEC:
- so_call(s_routine);
- if (!s_RoutineDone)
+ if (!ipuBDEC(ipuRegs->cmd.DATA, true))
{
if(ipu1dma->chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
@@ -882,7 +865,6 @@ void IPUWorker()
ipu_cmd.current = 0xffffffff;
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
- s_routine = NULL;
if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
@@ -946,7 +928,7 @@ u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size)
g_BP.FP = 1;
}
- if ((g_BP.FP < 2) && (*(int*)pointer + size) >= 128)
+ if ((g_BP.FP < 2) && ((*(int*)pointer + size) >= 128))
{
if (ipu_fifo.in.read(next_readbits())) g_BP.FP += 1;
}
@@ -967,6 +949,83 @@ u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size)
return (g_BP.FP >= 1) ? g_BP.FP * 128 - (*(int*)pointer) : 0;
}
+// whenever reading fractions of bytes. The low bits always come from the next byte
+// while the high bits come from the current byte
+u8 __fastcall getBits128(u8 *address, u32 advance)
+{
+ u64 mask2;
+ u128 mask;
+ u32 shift;
+ u8* readpos;
+
+ // Check if the current BP has exceeded or reached the limit of 128
+ if (FillInternalBuffer(&g_BP.BP, 1, 128) < 128) return 0;
+
+ readpos = readbits + (int)g_BP.BP / 8;
+
+ if (g_BP.BP & 7)
+ {
+ shift = g_BP.BP & 7;
+ mask2 = 0xff >> shift;
+ mask.lo = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
+ mask.hi = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
+
+ u128 notMask;
+ u128 data = *(u128*)(readpos + 1);
+ notMask.lo = ~mask.lo & data.lo;
+ notMask.hi = ~mask.hi & data.hi;
+ notMask.lo >>= 8 - shift;
+ notMask.lo |= (notMask.hi & (0xFFFFFFFFFFFFFFFF >> (64 - shift))) << (64 - shift);
+ notMask.hi >>= 8 - shift;
+
+ mask.hi = (((*(u128*)readpos).hi & mask.hi) << shift) | (((*(u128*)readpos).lo & mask.lo) >> (64 - shift));
+ mask.lo = ((*(u128*)readpos).lo & mask.lo) << shift;
+
+ notMask.lo |= mask.lo;
+ notMask.hi |= mask.hi;
+ *(u128*)address = notMask;
+ }
+ else
+ {
+ *(u128*)address = *(u128*)readpos;
+ }
+
+ if (advance) g_BP.BP += 128;
+
+ return 1;
+}
+
+// whenever reading fractions of bytes. The low bits always come from the next byte
+// while the high bits come from the current byte
+u8 __fastcall getBits64(u8 *address, u32 advance)
+{
+ register u64 mask = 0;
+ int shift = 0;
+ u8* readpos;
+
+ // Check if the current BP has exceeded or reached the limit of 128
+ if (FillInternalBuffer(&g_BP.BP, 1, 64) < 64) return 0;
+
+ readpos = readbits + (int)g_BP.BP / 8;
+
+ if (g_BP.BP & 7)
+ {
+ shift = g_BP.BP & 7;
+ mask = (0xff >> shift);
+ mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56);
+
+ *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift);
+ }
+ else
+ {
+ *(u64*)address = *(u64*)readpos;
+ }
+
+ if (advance) g_BP.BP += 64;
+
+ return 1;
+}
+
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits32(u8 *address, u32 advance)
@@ -1053,102 +1112,6 @@ u8 __fastcall getBits8(u8 *address, u32 advance)
return 1;
}
-int __fastcall getBits(u8 *address, u32 size, u32 advance)
-{
- register u32 mask = 0, shift = 0, howmuch;
- u8* oldbits, *oldaddr = address;
- u32 pointer = 0, temp;
-
- // Check if the current BP has exceeded or reached the limit of 128
- if (FillInternalBuffer(&g_BP.BP, 1, 8) < 8) return 0;
-
- oldbits = readbits;
- // Backup the current BP in case of VDEC/FDEC
- pointer = g_BP.BP;
-
- if (pointer & 7)
- {
- address--;
- while (size)
- {
- if (shift == 0)
- {
- *++address = 0;
- shift = 8;
- }
-
- temp = shift; // Lets not pass a register to min.
- howmuch = min(min(8 - (pointer & 7), 128 - pointer), min(size, temp));
-
- if (FillInternalBuffer(&pointer, advance, 8) < 8)
- {
- if (advance) g_BP.BP = pointer;
- return address - oldaddr;
- }
-
- mask = ((0xFF >> (pointer & 7)) << (8 - howmuch - (pointer & 7))) & 0xFF;
- mask &= readbits[((pointer) >> 3)];
- mask >>= 8 - howmuch - (pointer & 7);
- pointer += howmuch;
- size -= howmuch;
- shift -= howmuch;
- *address |= mask << shift;
- }
- ++address;
- }
- else
- {
- u8* readmem;
- while (size)
- {
- if (FillInternalBuffer(&pointer, advance, 8) < 8)
- {
- if (advance) g_BP.BP = pointer;
- return address -oldaddr;
- }
-
- howmuch = min(128 - pointer, size);
- size -= howmuch;
-
- readmem = readbits + (pointer >> 3);
- pointer += howmuch;
- howmuch >>= 3;
-
- while (howmuch >= 4)
- {
- *(u32*)address = *(u32*)readmem;
- howmuch -= 4;
- address += 4;
- readmem += 4;
- }
-
- switch (howmuch)
- {
- case 3:
- address[2] = readmem[2];
- case 2:
- address[1] = readmem[1];
- case 1:
- address[0] = readmem[0];
- case 0:
- break;
-
- jNO_DEFAULT
- }
-
- address += howmuch;
- }
- }
-
- // If not advance then reset the Reading buffer value
- if (advance)
- g_BP.BP = pointer;
- else
- readbits = oldbits; // restore the last pointer
-
- return address - oldaddr;
-}
-
///////////////////// CORE FUNCTIONS /////////////////
void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V,
const int Src_BpS, const int Width, const int Height);
@@ -1244,7 +1207,7 @@ static __forceinline void ipuDmacSrcChain()
{
case TAG_REFE: // refe
//if(IPU1Status.InProgress == false) ipu1dma->tadr += 16;
- if(IPU1Status.DMAFinished == false) IPU1Status.DMAFinished = true;
+ IPU1Status.DMAFinished = true;
break;
case TAG_CNT: // cnt
// Set the taddr to the next tag
@@ -1264,7 +1227,7 @@ static __forceinline void ipuDmacSrcChain()
case TAG_END: // end
ipu1dma->tadr = ipu1dma->madr;
- if(IPU1Status.DMAFinished == false) IPU1Status.DMAFinished = true;
+ IPU1Status.DMAFinished = true;
break;
}
}
@@ -1300,7 +1263,6 @@ static __forceinline int IPU1chain() {
if (ipu1dma->qwc > 0 && IPU1Status.InProgress == true)
{
-
int qwc = ipu1dma->qwc;
u32 *pMem;
@@ -1308,7 +1270,8 @@ static __forceinline int IPU1chain() {
if (pMem == NULL)
{
- Console.Error("ipu1dma NULL!"); return totalqwc;
+ Console.Error("ipu1dma NULL!");
+ return totalqwc;
}
//Write our data to the fifo
@@ -1484,7 +1447,6 @@ int IPU1dma()
}
else
{
- IPU_LOG("Here");
cpuRegs.eCycle[4] = 0x9999;//IPU_INT_TO(2048);
}
@@ -1601,7 +1563,6 @@ __forceinline void dmaIPU1() // toIPU
IPU1Status.DMAMode = DMA_MODE_CHAIN;
IPU1dma();
- //if (ipuRegs->ctrl.BUSY) IPUWorker();
}
else //Normal Mode
{
@@ -1623,7 +1584,6 @@ __forceinline void dmaIPU1() // toIPU
IPU1Status.DMAFinished = true;
IPU1Status.DMAMode = DMA_MODE_NORMAL;
IPU1dma();
- //if (ipuRegs->ctrl.BUSY) IPUWorker();
}
}
}
diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h
index ab831329aa..bb23f05e33 100644
--- a/pcsx2/IPU/IPU.h
+++ b/pcsx2/IPU/IPU.h
@@ -17,7 +17,6 @@
#define __IPU_H__
#include "mpeg2lib/Mpeg.h"
-#include "coroutine.h"
#include "IPU_Fifo.h"
#ifdef _MSC_VER
@@ -327,7 +326,7 @@ struct IPUregisters {
struct tIPU_cmd
{
int index;
- int pos[2];
+ int pos[6];
int current;
void clear()
{
@@ -342,12 +341,13 @@ struct tIPU_cmd
}
};
-//extern tIPU_cmd ipu_cmd;
+extern tIPU_cmd ipu_cmd;
extern tIPU_BP g_BP;
extern int coded_block_pattern;
extern int g_nIPU0Data; // or 0x80000000 whenever transferring
extern u8* g_pIPU0Pointer;
-
+extern IPUStatus IPU1Status;
+extern tIPU_DMA g_nDMATransfer;
// The IPU can only do one task at once and never uses other buffers so these
// should be made available to functions in other modules to save registers.
extern __aligned16 macroblock_rgb32 rgb32;
@@ -376,10 +376,11 @@ extern int IPU0dma();
extern int IPU1dma();
extern u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size);
+extern u8 __fastcall getBits128(u8 *address, u32 advance);
+extern u8 __fastcall getBits64(u8 *address, u32 advance);
extern u8 __fastcall getBits32(u8 *address, u32 advance);
extern u8 __fastcall getBits16(u8 *address, u32 advance);
extern u8 __fastcall getBits8(u8 *address, u32 advance);
-extern int __fastcall getBits(u8 *address, u32 size, u32 advance);
#endif
diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp
index 66b1af502c..2435903aa3 100644
--- a/pcsx2/IPU/IPU_Fifo.cpp
+++ b/pcsx2/IPU/IPU_Fifo.cpp
@@ -13,7 +13,6 @@
* If not, see .
*/
-
#include "PrecompiledHeader.h"
#include "Common.h"
#include "IPU_Fifo.h"
@@ -106,20 +105,18 @@ int IPU_Fifo_Output::write(const u32 *value, int size)
ipuRegs->ctrl.OFC += firsttrans;
IPU0dma();
- //Console.WriteLn("Written %d qwords, %d", firsttrans,ipuRegs->ctrl.OFC);
return firsttrans;
}
int IPU_Fifo_Input::read(void *value)
{
- // wait until enough data
- if (g_BP.IFC < 8)
+ // wait until enough data to ensure proper streaming.
+ if (g_BP.IFC < 4)
{
// IPU FIFO is empty and DMA is waiting so lets tell the DMA we are ready to put data in the FIFO
if(cpuRegs.eCycle[4] == 0x9999)
{
- //DevCon.Warning("Setting ECycle");
CPU_INT( DMAC_TO_IPU, 4 );
}
diff --git a/pcsx2/IPU/acoroutine.S b/pcsx2/IPU/acoroutine.S
deleted file mode 100644
index 2c28a2c248..0000000000
--- a/pcsx2/IPU/acoroutine.S
+++ /dev/null
@@ -1,78 +0,0 @@
-.intel_syntax noprefix
-
-.extern g_pCurrentRoutine
-
-.globl so_call
-so_call:
- mov eax, dword ptr [esp+4]
- test dword ptr [eax+24], 1
- jnz RestoreRegs
- mov [eax+8], ebx
- mov [eax+12], esi
- mov [eax+16], edi
- mov [eax+20], ebp
- mov dword ptr [eax+24], 1
- jmp CallFn
-RestoreRegs:
- // have to load and save at the same time
- mov ecx, [eax+8]
- mov edx, [eax+12]
- mov [eax+8], ebx
- mov [eax+12], esi
- mov ebx, ecx
- mov esi, edx
- mov ecx, [eax+16]
- mov edx, [eax+20]
- mov [eax+16], edi
- mov [eax+20], ebp
- mov edi, ecx
- mov ebp, edx
-
-CallFn:
- mov [g_pCurrentRoutine], eax
- mov ecx, esp
- mov esp, [eax+4]
- mov [eax+4], ecx
-
- jmp dword ptr [eax]
-
-.globl so_resume
-so_resume:
- mov eax, [g_pCurrentRoutine]
- mov ecx, [eax+8]
- mov edx, [eax+12]
- mov [eax+8], ebx
- mov [eax+12], esi
- mov ebx, ecx
- mov esi, edx
- mov ecx, [eax+16]
- mov edx, [eax+20]
- mov [eax+16], edi
- mov [eax+20], ebp
- mov edi, ecx
- mov ebp, edx
-
- // put the return address in pcalladdr
- mov ecx, [esp]
- mov [eax], ecx
- add esp, 4 // remove the return address
-
- // swap stack pointers
- mov ecx, [eax+4]
- mov [eax+4], esp
- mov esp, ecx
- ret
-
-.globl so_exit
-so_exit:
- mov eax, [g_pCurrentRoutine]
- mov esp, [eax+4]
- mov ebx, [eax+8]
- mov esi, [eax+12]
- mov edi, [eax+16]
- mov ebp, [eax+20]
- ret
-
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/pcsx2/IPU/acoroutine.asm b/pcsx2/IPU/acoroutine.asm
deleted file mode 100644
index d81a5f12d2..0000000000
--- a/pcsx2/IPU/acoroutine.asm
+++ /dev/null
@@ -1,140 +0,0 @@
-; Pcsx2 - Pc Ps2 Emulator
-; Copyright (C) 2002-2008 Pcsx2 Team
-;
-; This program is free software; you can redistribute it and/or modify
-; it under the terms of the GNU General Public License as published by
-; the Free Software Foundation; either version 2 of the License, or
-; (at your option) any later version.
-
-; This program is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with this program; if not, write to the Free Software
-; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-
-;; x86-64 coroutine fucntions
-extern g_pCurrentRoutine:ptr
-
-.code
-
-so_call proc public
- test dword ptr [rcx+88], 1
- jnz so_call_RestoreRegs
- mov [rcx+24], rbp
- mov [rcx+16], rbx
- mov [rcx+32], r12
- mov [rcx+40], r13
- mov [rcx+48], r14
- mov [rcx+56], r15
- mov [rcx+64], rsi
- mov [rcx+72], rdi
- mov dword ptr [rcx+88], 1
- jmp so_call_CallFn
-so_call_RestoreRegs:
- ;; have to load and save at the same time
- ;; rbp, rbx, r12
- mov rax, [rcx+24]
- mov r8, [rcx+16]
- mov rdx, [rcx+32]
- mov [rcx+24], rbp
- mov [rcx+16], rbx
- mov [rcx+32], r12
- mov rbp, rax
- mov rbx, r8
- mov r12, rdx
- ;; r13, r14, r15
- mov rax, [rcx+40]
- mov r8, [rcx+48]
- mov rdx, [rcx+56]
- mov [rcx+40], r13
- mov [rcx+48], r14
- mov [rcx+56], r15
- mov r13, rax
- mov r14, r8
- mov r15, rdx
-
- ;; rsi, rdi
- mov rax, [rcx+64]
- mov rdx, [rcx+72]
- mov [rcx+64], rsi
- mov [rcx+72], rdi
- mov rsi, rax
- mov rdi, rdx
-
-so_call_CallFn:
- mov [g_pCurrentRoutine], rcx
-
- ;; swap the stack
- mov rax, [rcx+8]
- mov [rcx+8], rsp
- mov rsp, rax
- mov rax, [rcx+0]
- mov rcx, [rcx+80]
-
- jmp rax
-
-so_call endp
-
-; so_resume
-so_resume proc public
- ;; rbp, rbx, r12
- mov rcx, [g_pCurrentRoutine]
- mov rax, [rcx+24]
- mov r8, [rcx+16]
- mov rdx, [rcx+32]
- mov [rcx+24], rbp
- mov [rcx+16], rbx
- mov [rcx+32], r12
- mov rbp, rax
- mov rbx, r8
- mov r12, rdx
- ;; r13, r14, r15
- mov rax, [rcx+40]
- mov r8, [rcx+48]
- mov rdx, [rcx+56]
- mov [rcx+40], r13
- mov [rcx+48], r14
- mov [rcx+56], r15
- mov r13, rax
- mov r14, r8
- mov r15, rdx
- ;; rsi, rdi
- mov rax, [rcx+64]
- mov rdx, [rcx+72]
- mov [rcx+64], rsi
- mov [rcx+72], rdi
- mov rsi, rax
- mov rdi, rdx
-
- ;; put the return address in pcalladdr
- mov rax, [rsp]
- mov [rcx], rax
- add rsp, 8 ;; remove the return address
-
- ;; swap stack pointers
- mov rax, [rcx+8]
- mov [rcx+8], rsp
- mov rsp, rax
-
- ret
-
-so_resume endp
-
-so_exit proc public
- mov rcx, [g_pCurrentRoutine]
- mov rsp, [rcx+8]
- mov rbp, [rcx+24]
- mov rbx, [rcx+16]
- mov r12, [rcx+32]
- mov r13, [rcx+40]
- mov r14, [rcx+48]
- mov r15, [rcx+56]
- mov rsi, [rcx+64]
- mov rdi, [rcx+72]
- ret
-so_exit endp
-
-end
\ No newline at end of file
diff --git a/pcsx2/IPU/coroutine.cpp b/pcsx2/IPU/coroutine.cpp
deleted file mode 100644
index b2992091b1..0000000000
--- a/pcsx2/IPU/coroutine.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2010 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-
-#include "PrecompiledHeader.h"
-
-#include "coroutine.h"
-
-struct coroutine {
- void* pcalladdr;
- void *pcurstack;
-
- uptr storeebx, storeesi, storeedi, storeebp;
-
- s32 restore; // if nonzero, restore the registers
- s32 alloc;
- //struct s_coroutine *caller;
- //struct s_coroutine *restarget;
-
-};
-
-#define CO_STK_ALIGN 256
-#define CO_STK_COROSIZE ((sizeof(coroutine) + CO_STK_ALIGN - 1) & ~(CO_STK_ALIGN - 1))
-#define CO_MIN_SIZE (4 * 1024)
-
-coroutine* g_pCurrentRoutine;
-
-coroutine_t so_create(void (*func)(void *), void *data, void *stack, int size)
-{
- void* endstack;
- int alloc = 0; // r = CO_STK_COROSIZE;
- coroutine *co;
-
- if ((size &= ~(sizeof(s32) - 1)) < CO_MIN_SIZE) return NULL;
- if (!stack) {
- size = (size + sizeof(coroutine) + CO_STK_ALIGN - 1) & ~(CO_STK_ALIGN - 1);
- stack = malloc(size);
- if (!stack) return NULL;
- alloc = size;
- }
- endstack = (char*)stack + size - 64;
- co = (coroutine*)stack;
- stack = (char *) stack + CO_STK_COROSIZE;
- *(void**)endstack = NULL;
- *(void**)((char*)endstack+sizeof(void*)) = data;
- co->alloc = alloc;
- co->pcalladdr = (void*)func;
- co->pcurstack = endstack;
- return co;
-}
-
-void so_delete(coroutine_t coro)
-{
- coroutine *co = (coroutine *) coro;
- pxAssert( co != NULL );
- if (co->alloc) free(co);
-}
-
-// see acoroutines.S and acoroutines.asm for other asm implementations
-#if defined(_MSC_VER)
-
-__declspec(naked) void so_call(coroutine_t coro)
-{
- __asm {
- mov eax, dword ptr [esp+4]
- test dword ptr [eax+24], 1
- jnz RestoreRegs
- mov [eax+8], ebx
- mov [eax+12], esi
- mov [eax+16], edi
- mov [eax+20], ebp
- mov dword ptr [eax+24], 1
- jmp CallFn
-RestoreRegs:
- // have to load and save at the same time
- mov ecx, [eax+8]
- mov edx, [eax+12]
- mov [eax+8], ebx
- mov [eax+12], esi
- mov ebx, ecx
- mov esi, edx
- mov ecx, [eax+16]
- mov edx, [eax+20]
- mov [eax+16], edi
- mov [eax+20], ebp
- mov edi, ecx
- mov ebp, edx
-
-CallFn:
- mov [g_pCurrentRoutine], eax
- mov ecx, esp
- mov esp, [eax+4]
- mov [eax+4], ecx
-
- jmp dword ptr [eax]
- }
-}
-
-__declspec(naked) void so_resume(void)
-{
- __asm {
- mov eax, [g_pCurrentRoutine]
- mov ecx, [eax+8]
- mov edx, [eax+12]
- mov [eax+8], ebx
- mov [eax+12], esi
- mov ebx, ecx
- mov esi, edx
- mov ecx, [eax+16]
- mov edx, [eax+20]
- mov [eax+16], edi
- mov [eax+20], ebp
- mov edi, ecx
- mov ebp, edx
-
- // put the return address in pcalladdr
- mov ecx, [esp]
- mov [eax], ecx
- add esp, 4 // remove the return address
-
- // swap stack pointers
- mov ecx, [eax+4]
- mov [eax+4], esp
- mov esp, ecx
- ret
- }
-}
-
-__declspec(naked) void so_exit(void)
-{
- __asm {
- mov eax, [g_pCurrentRoutine]
- mov esp, [eax+4]
- mov ebx, [eax+8]
- mov esi, [eax+12]
- mov edi, [eax+16]
- mov ebp, [eax+20]
- ret
- }
-}
-#endif
diff --git a/pcsx2/IPU/coroutine.h b/pcsx2/IPU/coroutine.h
deleted file mode 100644
index 7d40348450..0000000000
--- a/pcsx2/IPU/coroutine.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2010 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#ifndef PCSX2_COROUTINE_LIB
-#define PCSX2_COROUTINE_LIB
-
-// low level coroutine library
-typedef void *coroutine_t;
-
-coroutine_t so_create(void (*func)(void *), void *data, void *stack, int size);
-void so_delete(coroutine_t coro);
-
-#include "NakedAsm.h"
-
-#endif
diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp
index c7c8afca15..7b7a278fa9 100644
--- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp
+++ b/pcsx2/IPU/mpeg2lib/Mpeg.cpp
@@ -48,55 +48,51 @@ int non_linear_quantizer_scale [] =
back to the 1st slot when 128bits have been read.
*/
extern void ReorderBitstream();
+const DCTtab * tab;
+int mbaCount = 0;
-int get_macroblock_modes(decoder_t * const decoder)
+int get_macroblock_modes()
{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
int macroblock_modes;
const MBtab * tab;
- switch (decoder->coding_type)
+ switch (decoder.coding_type)
{
-
case I_TYPE:
- macroblock_modes = UBITS(bit_buf, 2);
+ macroblock_modes = UBITS(2);
if (macroblock_modes == 0) return 0; // error
tab = MB_I + (macroblock_modes >> 1);
- DUMPBITS(bit_buf, bits, tab->len);
+ DUMPBITS(tab->len);
macroblock_modes = tab->modes;
- if ((!(decoder->frame_pred_frame_dct)) &&
- (decoder->picture_structure == FRAME_PICTURE))
+ if ((!(decoder.frame_pred_frame_dct)) &&
+ (decoder.picture_structure == FRAME_PICTURE))
{
- macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED;
- DUMPBITS(bit_buf, bits, 1);
+ macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
}
return macroblock_modes;
case P_TYPE:
- macroblock_modes = UBITS(bit_buf, 6);
+ macroblock_modes = UBITS(6);
if (macroblock_modes == 0) return 0; // error
tab = MB_P + (macroblock_modes >> 1);
- DUMPBITS(bit_buf, bits, tab->len);
+ DUMPBITS(tab->len);
macroblock_modes = tab->modes;
- if (decoder->picture_structure != FRAME_PICTURE)
+ if (decoder.picture_structure != FRAME_PICTURE)
{
if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
{
- macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
- DUMPBITS(bit_buf, bits, 2);
+ macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
}
return macroblock_modes;
}
- else if (decoder->frame_pred_frame_dct)
+ else if (decoder.frame_pred_frame_dct)
{
if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
macroblock_modes |= MC_FRAME;
@@ -107,39 +103,36 @@ int get_macroblock_modes(decoder_t * const decoder)
{
if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
{
- macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
- DUMPBITS(bit_buf, bits, 2);
+ macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
}
if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
{
- macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED;
- DUMPBITS(bit_buf, bits, 1);
+ macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
}
return macroblock_modes;
}
case B_TYPE:
- macroblock_modes = UBITS(bit_buf, 6);
+ macroblock_modes = UBITS(6);
if (macroblock_modes == 0) return 0; // error
tab = MB_B + macroblock_modes;
- DUMPBITS(bit_buf, bits, tab->len);
+ DUMPBITS(tab->len);
macroblock_modes = tab->modes;
- if (decoder->picture_structure != FRAME_PICTURE)
+ if (decoder.picture_structure != FRAME_PICTURE)
{
if (!(macroblock_modes & MACROBLOCK_INTRA))
{
- macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
- DUMPBITS(bit_buf, bits, 2);
+ macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
}
return macroblock_modes;
}
- else if (decoder->frame_pred_frame_dct)
+ else if (decoder.frame_pred_frame_dct)
{
/* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
macroblock_modes |= MC_FRAME;
@@ -149,968 +142,544 @@ int get_macroblock_modes(decoder_t * const decoder)
{
if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
- macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
- DUMPBITS(bit_buf, bits, 2);
+ macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
{
intra:
- macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED;
- DUMPBITS(bit_buf, bits, 1);
+ macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
}
return macroblock_modes;
}
case D_TYPE:
- macroblock_modes = UBITS(bit_buf, 1);
+ macroblock_modes = GETBITS(1);
if (macroblock_modes == 0) return 0; // error
-
- DUMPBITS(bit_buf, bits, 1);
return MACROBLOCK_INTRA;
default:
return 0;
}
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
}
-static __forceinline int get_quantizer_scale(decoder_t * const decoder)
+static __forceinline int get_quantizer_scale()
{
int quantizer_scale_code;
- quantizer_scale_code = UBITS(decoder->bitstream_buf, 5);
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 5);
+ quantizer_scale_code = GETBITS(5);
- if (decoder->q_scale_type)
+ if (decoder.q_scale_type)
return non_linear_quantizer_scale [quantizer_scale_code];
else
return quantizer_scale_code << 1;
}
-static __forceinline int get_coded_block_pattern(decoder_t * const decoder)
+static __forceinline int get_coded_block_pattern()
{
const CBPtab * tab;
+ u16 code = UBITS(16);
- NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
-
- if (decoder->bitstream_buf >= 0x20000000)
- tab = CBP_7 + (UBITS(decoder->bitstream_buf, 7) - 16);
+ if (code >= 0x2000)
+ tab = CBP_7 + (UBITS(7) - 16);
else
- tab = CBP_9 + UBITS(decoder->bitstream_buf, 9);
+ tab = CBP_9 + UBITS(9);
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, tab->len);
+ DUMPBITS(tab->len);
return tab->cbp;
}
-static __forceinline int get_luma_dc_dct_diff(decoder_t * const decoder)
+int __forceinline get_motion_delta(const int f_code)
{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
+#define bit_buf (decoder.bitstream_buf)
+#define bits (decoder.bitstream_bits)
+#define bit_ptr (decoder.bitstream_ptr)
- const DCtab * tab;
- int size;
- int dc_diff;
+ int delta;
+ int sign;
+ const MVtab * tab;
+ u16 code = UBITS(16);
- if (bit_buf < 0xf8000000)
+ if ((code & 0x8000))
{
- tab = DC_lum_5 + UBITS(bit_buf, 5);
- size = tab->size;
-
- if (size)
- {
- DUMPBITS(bit_buf, bits, tab->len);
- bits += size;
- dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
- bit_buf <<= size;
- return dc_diff;
- }
- else
- {
- DUMPBITS(bit_buf, bits, 3);
- return 0;
- }
+ DUMPBITS(1);
+ return 0x00010000;
+ }
+ else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
+ {
+ tab = MV_4 + UBITS(4);
+ }
+ else
+ {
+ tab = MV_10 + UBITS(10);
}
- tab = DC_long + (UBITS(bit_buf, 9) - 0x1e0); //0x1e0);
+ delta = tab->delta + 1;
+ DUMPBITS(tab->len);
+
+ sign = SBITS(1);
+ DUMPBITS(1);
+ return (delta ^ sign) - sign;
- size = tab->size;
- DUMPBITS(bit_buf, bits, tab->len);
- NEEDBITS(bit_buf, bits, bit_ptr);
- dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
- DUMPBITS(bit_buf, bits, size);
- return dc_diff;
#undef bit_buf
#undef bits
#undef bit_ptr
}
-static __forceinline int get_chroma_dc_dct_diff(decoder_t * const decoder)
+int __forceinline get_dmv()
{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
+ const DMVtab * tab;
- const DCtab * tab;
+ tab = DMV_2 + UBITS(2);
+ DUMPBITS(tab->len);
+ return tab->dmv;
+}
+
+int get_macroblock_address_increment()
+{
+ const MBAtab *mba;
+
+ u16 code = UBITS(16);
+
+ if (code >= 4096)
+ mba = MBA_5 + (UBITS(5) - 2);
+ else if (code >= 768)
+ mba = MBA_11 + (UBITS(11) - 24);
+ else switch (UBITS(11))
+ {
+
+ case 8: /* macroblock_escape */
+ DUMPBITS(11);
+ return 0x23;
+
+ case 15: /* macroblock_stuffing (MPEG1 only) */
+ if (decoder.mpeg1)
+ {
+ DUMPBITS(11);
+ return 0x22;
+ }
+
+ default:
+ return 0;//error
+ }
+
+ DUMPBITS(mba->len);
+
+ return mba->mba + 1;
+}
+
+static __forceinline int get_luma_dc_dct_diff()
+{
int size;
int dc_diff;
+ u16 code = UBITS(5);
- if (bit_buf < 0xf8000000)
+ if (code < 31)
{
- tab = DC_chrom_5 + UBITS(bit_buf, 5);
- size = tab->size;
+ size = DClumtab0[code].size;
+ DUMPBITS(DClumtab0[code].len);
- if (size)
- {
- DUMPBITS(bit_buf, bits, tab->len);
- bits += size;
- dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
- bit_buf <<= size;
- return dc_diff;
- }
- else
- {
- DUMPBITS(bit_buf, bits, 2);
- return 0;
- }
+ // 5 bits max
+ }
+ else
+ {
+ code = UBITS(9) - 0x1f0;
+ size = DClumtab1[code].size;
+ DUMPBITS(DClumtab1[code].len);
+
+ // 9 bits max
+ }
+
+ if (size==0)
+ dc_diff = 0;
+ else
+ {
+ dc_diff = GETBITS(size);
+
+ // 6 for tab0 and 11 for tab1
+ if ((dc_diff & (1<<(size-1)))==0)
+ dc_diff-= (1<size;
- DUMPBITS(bit_buf, bits, tab->len + 1);
- NEEDBITS(bit_buf, bits, bit_ptr);
- dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
- DUMPBITS(bit_buf, bits, size);
return dc_diff;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
+}
+
+static __forceinline int get_chroma_dc_dct_diff()
+{
+ int size;
+ int dc_diff;
+ u16 code = UBITS(5);
+
+ if (code<31)
+ {
+ size = DCchromtab0[code].size;
+ DUMPBITS(DCchromtab0[code].len);
+ }
+ else
+ {
+ code = UBITS(10) - 0x3e0;
+ size = DCchromtab1[code].size;
+ DUMPBITS(DCchromtab1[code].len);
+ }
+
+ if (size==0)
+ dc_diff = 0;
+ else
+ {
+ dc_diff = GETBITS(size);
+
+ if ((dc_diff & (1<<(size-1)))==0)
+ {
+ dc_diff-= (1< 4095)) \
- val = SBITS (val, 1) ^ 2047; \
+ val = (((s32)val) >> 31) ^ 2047; \
} while (0)
-static __forceinline void get_intra_block_B14(decoder_t * const decoder)
+static __forceinline bool get_intra_block()
{
int i;
int j;
int val;
- const u8 * scan = decoder->scan;
- const u8 * quant_matrix = decoder->intra_quantizer_matrix;
- int quantizer_scale = decoder->quantizer_scale;
- int mismatch;
- const DCTtab * tab;
- u32 bit_buf;
- u8 * bit_ptr;
- int bits;
- s16 * dest;
+ const u8 * scan = decoder.scan;
+ const u8 * quant_matrix = decoder.intra_quantizer_matrix;
+ int quantizer_scale = decoder.quantizer_scale;
+ s16 * dest = decoder.DCTblock;
+ u16 code;
- dest = decoder->DCTblock;
- i = 0;
- mismatch = ~dest[0];
-
- bit_buf = decoder->bitstream_buf;
- bits = decoder->bitstream_bits;
- bit_ptr = decoder->bitstream_ptr;
-
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- while (1)
- {
- if (bit_buf >= 0x28000000)
+ /* decode AC coefficients */
+ for (i=1 + ipu_cmd.pos[4]; ; i++)
+ {
+ switch (ipu_cmd.pos[5])
+ {
+ case 0:
+ if (!GETWORD())
{
- tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
- i += tab->run;
- if (i >= 64) break; /* end of block */
-
-normal_code:
- j = scan[i];
- bit_buf <<= tab->len;
- bits += tab->len + 1;
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
-
- /* if (bitstream_get (1)) val = -val; */
- val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
- SATURATE(val);
- dest[j] = val;
- mismatch ^= val;
- bit_buf <<= 1;
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
+ ipu_cmd.pos[4] = i - 1;
+ return false;
}
- else if (bit_buf >= 0x04000000)
+
+ code = UBITS(16);
+
+ if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
{
- tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
- i += tab->run;
-
- if (i < 64) goto normal_code;
-
- /* escape code */
-
- i += UBITS(bit_buf << 6, 6) - 64;
-
- if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
- j = scan[i];
-
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (SBITS(bit_buf, 12) * quantizer_scale * quant_matrix[i]) / 16;
-
- SATURATE(val);
- dest[j] = val;
- mismatch ^= val;
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
-
+ tab = &DCTtabnext[(code >> 12) - 4];
}
- else if (bit_buf >= 0x02000000)
+ else if (code >= 1024)
{
- tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
- i += tab->run;
-
- if (i < 64) goto normal_code;
+ if (decoder.intra_vlc_format && !decoder.mpeg1)
+ {
+ tab = &DCTtab0a[(code >> 8) - 4];
+ }
+ else
+ {
+ tab = &DCTtab0[(code >> 8) - 4];
+ }
}
- else if (bit_buf >= 0x00800000)
+ else if (code >= 512)
{
- tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
+ if (decoder.intra_vlc_format && !decoder.mpeg1)
+ {
+ tab = &DCTtab1a[(code >> 6) - 8];
+ }
+ else
+ {
+ tab = &DCTtab1[(code >> 6) - 8];
+ }
}
- else if (bit_buf >= 0x00200000)
+ else if (code >= 256)
{
- tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
+ tab = &DCTtab2[(code >> 4) - 16];
+ }
+ else if (code >= 128)
+ {
+ tab = &DCTtab3[(code >> 3) - 16];
+ }
+ else if (code >= 64)
+ {
+ tab = &DCTtab4[(code >> 2) - 16];
+ }
+ else if (code >= 32)
+ {
+ tab = &DCTtab5[(code >> 1) - 16];
+ }
+ else if (code >= 16)
+ {
+ tab = &DCTtab6[code - 16];
}
else
{
- tab = DCT_16 + UBITS(bit_buf, 16);
- bit_buf <<= 16;
- GETWORD(&bit_buf, bits + 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
+ ipu_cmd.pos[4] = 0;
+ return true;
}
- break; /* illegal, check needed to avoid buffer overflow */
- }
+ DUMPBITS(tab->len);
- dest[63] ^= mismatch & 1;
+ if (tab->run==64) /* end_of_block */
+ {
+ ipu_cmd.pos[4] = 0;
+ return true;
+ }
+
+ i+= tab->run == 65 ? GETBITS(6) : tab->run;
+ if (i >= 64)
+ {
+ ipu_cmd.pos[4] = 0;
+ return true;
+ }
+ case 1:
+ if (!GETWORD())
+ {
+ ipu_cmd.pos[4] = i - 1;
+ ipu_cmd.pos[5] = 1;
+ return false;
+ }
- if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
+ j = scan[i];
- DUMPBITS(bit_buf, bits, tab->len); /* dump end of block code */
+ if (tab->run==65) /* escape */
+ {
+ if(!decoder.mpeg1)
+ {
+ val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
+ DUMPBITS(12);
+ }
+ else
+ {
+ val = SBITS(8);
+ DUMPBITS(8);
- decoder->bitstream_buf = bit_buf;
- decoder->bitstream_bits = bits;
+ if (!(val & 0x7f))
+ {
+ val = GETBITS(8) + 2 * val;
+ }
+
+ val = (val * quantizer_scale * quant_matrix[i]) >> 4;
+ val = (val + ~ (((s32)val) >> 31)) | 1;
+ }
+ }
+ else
+ {
+ val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+ if(decoder.mpeg1)
+ {
+ /* oddification */
+ val = (val - 1) | 1;
+ }
+
+ /* if (bitstream_get (1)) val = -val; */
+ val = (val ^ SBITS(1)) - SBITS(1);
+ DUMPBITS(1);
+ }
+
+ SATURATE(val);
+ dest[j] = val;
+ ipu_cmd.pos[5] = 0;
+ }
+ }
+
+ ipu_cmd.pos[4] = 0;
+ return true;
}
-static __forceinline void get_intra_block_B15(decoder_t * const decoder)
+static __forceinline bool get_non_intra_block(int * last)
{
int i;
int j;
int val;
- const u8 * scan = decoder->scan;
- const u8 * quant_matrix = decoder->intra_quantizer_matrix;
- int quantizer_scale = decoder->quantizer_scale;
- int mismatch;
- const DCTtab * tab;
- u32 bit_buf;
- u8 * bit_ptr;
- int bits;
- s16 * dest;
+ const u8 * scan = decoder.scan;
+ const u8 * quant_matrix = decoder.non_intra_quantizer_matrix;
+ int quantizer_scale = decoder.quantizer_scale;
+ s16 * dest = decoder.DCTblock;
+ u16 code;
- dest = decoder->DCTblock;
- i = 0;
- mismatch = ~dest[0];
-
- bit_buf = decoder->bitstream_buf;
- bits = decoder->bitstream_bits;
- bit_ptr = decoder->bitstream_ptr;
-
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- while (1)
- {
- if (bit_buf >= 0x04000000)
+ /* decode AC coefficients */
+ for (i= ipu_cmd.pos[4] ; ; i++)
+ {
+ switch (ipu_cmd.pos[5])
{
- tab = DCT_B15_8 + (UBITS(bit_buf, 8) - 4);
- i += tab->run;
-
- if (i < 64)
+ case 0:
+ if (!GETWORD())
{
-normal_code:
- j = scan[i];
- bit_buf <<= tab->len;
- bits += tab->len + 1;
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+ ipu_cmd.pos[4] = i;
+ return false;
+ }
- /* if (bitstream_get (1)) val = -val; */
- val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
+ code = UBITS(16);
- SATURATE(val);
- dest[j] = val;
- mismatch ^= val;
-
- bit_buf <<= 1;
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- continue;
+ if (code >= 16384)
+ {
+ if (i==0)
+ {
+ tab = &DCTtabfirst[(code >> 12) - 4];
+ }
+ else
+ {
+ tab = &DCTtabnext[(code >> 12)- 4];
+ }
+ }
+ else if (code >= 1024)
+ {
+ tab = &DCTtab0[(code >> 8) - 4];
+ }
+ else if (code >= 512)
+ {
+ tab = &DCTtab1[(code >> 6) - 8];
+ }
+ else if (code >= 256)
+ {
+ tab = &DCTtab2[(code >> 4) - 16];
+ }
+ else if (code >= 128)
+ {
+ tab = &DCTtab3[(code >> 3) - 16];
+ }
+ else if (code >= 64)
+ {
+ tab = &DCTtab4[(code >> 2) - 16];
+ }
+ else if (code >= 32)
+ {
+ tab = &DCTtab5[(code >> 1) - 16];
+ }
+ else if (code >= 16)
+ {
+ tab = &DCTtab6[code - 16];
}
else
{
- /* end of block. I commented out this code because if we */
- /* dont exit here we will still exit at the later test :) */
- //if (i >= 128) break; /* end of block */
- /* escape code */
-
- i += UBITS(bit_buf << 6, 6) - 64;
-
- if (i >= 64) break; /* illegal, check against buffer overflow */
-
- j = scan[i];
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (SBITS(bit_buf, 12) * quantizer_scale * quant_matrix[i]) / 16;
-
- SATURATE(val);
- dest[j] = val;
- mismatch ^= val;
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
+ ipu_cmd.pos[4] = 0;
+ return true;
}
- }
- else if (bit_buf >= 0x02000000)
- {
- tab = DCT_B15_10 + (UBITS(bit_buf, 10) - 8);
- i += tab->run;
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00800000)
- {
- tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
- i += tab->run;
+ DUMPBITS(tab->len);
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00200000)
- {
- tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else
- {
- tab = DCT_16 + UBITS(bit_buf, 16);
- bit_buf <<= 16;
- GETWORD(&bit_buf, bits + 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
-
- break; /* illegal, check needed to avoid buffer overflow */
- }
-
- dest[63] ^= mismatch & 1;
-
- if ((bit_buf >> 28) != 0x6)
- ipuRegs->ctrl.ECD = 1;
-
- DUMPBITS(bit_buf, bits, tab->len); /* dump end of block code */
-
- decoder->bitstream_buf = bit_buf;
-
- decoder->bitstream_bits = bits;
-}
-
-static __forceinline int get_non_intra_block(decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
- int i;
- int j;
- int val;
- const u8 * scan = decoder->scan;
- const u8 * quant_matrix = decoder->non_intra_quantizer_matrix;
- int quantizer_scale = decoder->quantizer_scale;
- int mismatch;
- const DCTtab * tab;
- s16 * dest;
-
- i = -1;
- mismatch = -1;
- dest = decoder->DCTblock;
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- if (bit_buf >= 0x28000000)
- {
- tab = DCT_B14DC_5 + (UBITS(bit_buf, 5) - 5);
- goto entry_1;
- }
- else
- goto entry_2;
-
- while (1)
- {
- if (bit_buf >= 0x28000000)
- {
- tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
-entry_1:
- i += tab->run;
-
- if (i >= 64) break; /* end of block */
-normal_code:
- j = scan[i];
- bit_buf <<= tab->len;
- bits += tab->len + 1;
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
-
- /* if (bitstream_get (1)) val = -val; */
- val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
- SATURATE(val);
- dest[j] = val;
- mismatch ^= val;
- bit_buf <<= 1;
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
- }
-entry_2:
-
- if (bit_buf >= 0x04000000)
- {
- tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
- i += tab->run;
-
- if (i < 64) goto normal_code;
-
- /* escape code */
-
- i += UBITS(bit_buf << 6, 6) - 64;
-
- if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
- j = scan[i];
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
- val = 2 * (SBITS(bit_buf, 12) + SBITS(bit_buf, 1)) + 1;
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (val * quantizer_scale * quant_matrix[i]) / 32;
-
- SATURATE(val);
- dest[j] = val;
- mismatch ^= val;
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
- }
- else if (bit_buf >= 0x02000000)
- {
- tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00800000)
- {
- tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00200000)
- {
- tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else
- {
- tab = DCT_16 + UBITS(bit_buf, 16);
- bit_buf <<= 16;
- GETWORD(&bit_buf, bits + 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- break; /* illegal, check needed to avoid buffer overflow */
- }
-
- dest[63] ^= mismatch & 1;
-
- if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
- DUMPBITS(bit_buf, bits, tab->len); /* dump end of block code */
-
- decoder->bitstream_buf = bit_buf;
- decoder->bitstream_bits = bits;
- return i;
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-static __forceinline void get_mpeg1_intra_block(decoder_t * const decoder)
-{
- int i;
- int j;
- int val;
- const u8 * scan = decoder->scan;
- const u8 * quant_matrix = decoder->intra_quantizer_matrix;
- int quantizer_scale = decoder->quantizer_scale;
- const DCTtab * tab;
- u32 bit_buf;
- int bits;
- u8 * bit_ptr;
- s16 * dest;
-
- i = 0;
- dest = decoder->DCTblock;
- bit_buf = decoder->bitstream_buf;
- bits = decoder->bitstream_bits;
- bit_ptr = decoder->bitstream_ptr;
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- while (1)
- {
- if (bit_buf >= 0x28000000)
- {
- tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
- i += tab->run;
-
- if (i >= 64) break; /* end of block */
-
-normal_code:
- j = scan[i];
- bit_buf <<= tab->len;
- bits += tab->len + 1;
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
-
- /* oddification */
- val = (val - 1) | 1;
-
- /* if (bitstream_get (1)) val = -val; */
- val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
- SATURATE(val);
- dest[j] = val;
- bit_buf <<= 1;
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
-
- }
- else if (bit_buf >= 0x04000000)
- {
- tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
- i += tab->run;
-
- if (i < 64) goto normal_code;
-
- /* escape code */
-
- i += UBITS(bit_buf << 6, 6) - 64;
-
- if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
- j = scan[i];
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
- val = SBITS(bit_buf, 8);
-
- if (!(val & 0x7f))
+ if (tab->run==64) /* end_of_block */
{
- DUMPBITS(bit_buf, bits, 8);
- val = UBITS(bit_buf, 8) + 2 * val;
+ *last = i;
+ ipu_cmd.pos[4] = 0;
+ return true;
}
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (val * quantizer_scale * quant_matrix[i]) >> 4;
-
- /* oddification */
- val = (val + ~SBITS(val, 1)) | 1;
-
- SATURATE(val);
- dest[j] = val;
- DUMPBITS(bit_buf, bits, 8);
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
- }
- else if (bit_buf >= 0x02000000)
- {
- tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00800000)
- {
- tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00200000)
- {
- tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else
- {
- tab = DCT_16 + UBITS(bit_buf, 16);
- bit_buf <<= 16;
- GETWORD(&bit_buf, bits + 16);
- i += tab->run;
- goto normal_code;
- }
-
- break; /* illegal, check needed to avoid buffer overflow */
- }
-
- if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
- DUMPBITS(bit_buf, bits, 2); /* dump end of block code */
- decoder->bitstream_buf = bit_buf;
- decoder->bitstream_bits = bits;
-}
-
-static __forceinline int get_mpeg1_non_intra_block(decoder_t * const decoder)
-{
- int i;
- int j;
- int val;
- const u8 * scan = decoder->scan;
- const u8 * quant_matrix = decoder->non_intra_quantizer_matrix;
- int quantizer_scale = decoder->quantizer_scale;
- const DCTtab * tab;
- u32 bit_buf;
- int bits;
- u8 * bit_ptr;
- s16 * dest;
-
- i = -1;
- dest = decoder->DCTblock;
-
- bit_buf = decoder->bitstream_buf;
- bits = decoder->bitstream_bits;
- bit_ptr = decoder->bitstream_ptr;
-
- NEEDBITS(bit_buf, bits, bit_ptr);
-
- if (bit_buf >= 0x28000000)
- {
- tab = DCT_B14DC_5 + (UBITS(bit_buf, 5) - 5);
- goto entry_1;
- }
- else
- goto entry_2;
-
- while (1)
- {
- if (bit_buf >= 0x28000000)
- {
- tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
-entry_1:
- i += tab->run;
-
- if (i >= 64) break; /* end of block */
-
-normal_code:
- j = scan[i];
- bit_buf <<= tab->len;
- bits += tab->len + 1;
-
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
-
- /* oddification */
- val = (val - 1) | 1;
-
- /* if (bitstream_get (1)) val = -val; */
- val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
- SATURATE(val);
- dest[j] = val;
- bit_buf <<= 1;
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
- }
-entry_2:
- if (bit_buf >= 0x04000000)
- {
- tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
- i += tab->run;
-
- if (i < 64) goto normal_code;
-
- /* escape code */
-
- i += UBITS(bit_buf << 6, 6) - 64;
-
- if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
- j = scan[i];
- DUMPBITS(bit_buf, bits, 12);
- NEEDBITS(bit_buf, bits, bit_ptr);
- val = SBITS(bit_buf, 8);
-
- if (!(val & 0x7f))
+ i += (tab->run == 65) ? GETBITS(6) : tab->run;
+ if (i >= 64)
{
- DUMPBITS(bit_buf, bits, 8);
- val = UBITS(bit_buf, 8) + 2 * val;
+ *last = i;
+ ipu_cmd.pos[4] = 0;
+ return true;
}
- val = 2 * (val + SBITS(val, 1)) + 1;
+ case 1:
+ if (!GETWORD())
+ {
+ ipu_cmd.pos[4] = i;
+ ipu_cmd.pos[5] = 1;
+ return false;
+ }
- /* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
- val = (val * quantizer_scale * quant_matrix[i]) / 32;
+ j = scan[i];
- /* oddification */
- val = (val + ~SBITS(val, 1)) | 1;
+ if (tab->run==65) /* escape */
+ {
+ if (!decoder.mpeg1)
+ {
+ val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
+ DUMPBITS(12);
+ }
+ else
+ {
+ val = SBITS(8);
+ DUMPBITS(8);
+
+ if (!(val & 0x7f))
+ {
+ val = GETBITS(8) + 2 * val;
+ }
+
+ val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
+ val = (val + ~ (((s32)val) >> 31)) | 1;
+ }
+ }
+ else
+ {
+ val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
+ val = (val ^ SBITS(1)) - SBITS(1);
+ DUMPBITS(1);
+ }
SATURATE(val);
dest[j] = val;
- DUMPBITS(bit_buf, bits, 8);
- NEEDBITS(bit_buf, bits, bit_ptr);
- continue;
+ ipu_cmd.pos[5] = 0;
}
- else if (bit_buf >= 0x02000000)
- {
- tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00800000)
- {
- tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else if (bit_buf >= 0x00200000)
- {
- tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
- else
- {
- tab = DCT_16 + UBITS(bit_buf, 16);
- bit_buf <<= 16;
- GETWORD(&bit_buf, bits + 16);
- i += tab->run;
-
- if (i < 64) goto normal_code;
- }
-
- break; /* illegal, check needed to avoid buffer overflow */
}
- if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
- DUMPBITS(bit_buf, bits, 2); /* dump end of block code */
- decoder->bitstream_buf = bit_buf;
- decoder->bitstream_bits = bits;
- return i;
+ ipu_cmd.pos[4] = 0;
+ return true;
}
-static void __fastcall slice_intra_DCT(decoder_t * const decoder, const int cc,
- u8 * const dest, const int stride)
+static bool __fastcall slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
{
- NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
- /* Get the intra DC coefficient and inverse quantize it */
-
- if (cc == 0)
- decoder->dc_dct_pred[0] += get_luma_dc_dct_diff(decoder);
- else
- decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff(decoder);
-
- decoder->DCTblock[0] = decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision);
-
- if (decoder->mpeg1)
+ if (!skip || ipu_cmd.pos[3])
{
- get_mpeg1_intra_block(decoder);
- }
- else if (decoder->intra_vlc_format)
- {
- get_intra_block_B15(decoder);
- }
- else
- {
- get_intra_block_B14(decoder);
+ ipu_cmd.pos[3] = 0;
+ if (!GETWORD())
+ {
+ ipu_cmd.pos[3] = 1;
+ return false;
+ }
+
+ /* Get the intra DC coefficient and inverse quantize it */
+ if (cc == 0)
+ decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
+ else
+ decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
+
+ decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
}
- mpeg2_idct_copy(decoder->DCTblock, dest, stride);
+ if (!get_intra_block())
+ {
+ return false;
+ }
+
+ mpeg2_idct_copy(decoder.DCTblock, dest, stride);
+
+ return true;
}
-/* JayteeMaster: changed dest to 16 bit signed */
-static void __fastcall slice_non_intra_DCT(decoder_t * const decoder,
- /*u8*/s16 * const dest, const int stride)
+static bool __fastcall slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
{
int last;
- memzero(decoder->DCTblock);
- if (decoder->mpeg1)
- last = get_mpeg1_non_intra_block(decoder);
- else
- last = get_non_intra_block(decoder);
-
- mpeg2_idct_add(last, decoder->DCTblock, dest, stride);
-}
-
-#if defined(_MSC_VER)
-#pragma pack(1)
-#endif
-
-struct TGA_HEADER
-{
- u8 identsize; // size of ID field that follows 18 u8 header (0 usually)
- u8 colourmaptype; // type of colour map 0=none, 1=has palette
- u8 imagetype; // type of image 0=none,1=indexed,2=rgb,3=grey,+8=rle packed
-
- s16 colourmapstart; // first colour map entry in palette
- s16 colourmaplength; // number of colours in palette
- u8 colourmapbits; // number of bits per palette entry 15,16,24,32
-
- s16 xstart; // image x origin
- s16 ystart; // image y origin
- s16 width; // image width in pixels
- s16 height; // image height in pixels
- u8 bits; // image bits per pixel 8,16,24,32
- u8 descriptor; // image descriptor bits (vh flip bits)
-
- // pixel data follows header
-} __packed;
-
-#if defined(_MSC_VER)
-# pragma pack()
-#endif
-
-void SaveTGA(const char* filename, int width, int height, void* pdata)
-{
- TGA_HEADER hdr;
- FILE* f = fopen(filename, "wb");
-
- if (f == NULL) return;
-
- assert(sizeof(TGA_HEADER) == 18 && sizeof(hdr) == 18);
-
- memzero(hdr);
- hdr.imagetype = 2;
- hdr.bits = 32;
- hdr.width = width;
- hdr.height = height;
- hdr.descriptor |= 8 | (1 << 5); // 8bit alpha, flip vertical
- fwrite(&hdr, sizeof(hdr), 1, f);
- fwrite(pdata, width*height*4, 1, f);
- fclose(f);
-}
-
-static int s_index = 0; //, s_frame = 0;
-
-void SaveRGB32(u8* ptr)
-{
- char filename[255];
- sprintf(filename, "frames/frame%.4d.tga", s_index++);
- SaveTGA(filename, 16, 16, ptr);
-}
-
-void waitForSCD()
-{
- u8 bit8 = 1;
-
- while (!getBits8((u8*)&bit8, 0))
+ if (!skip)
{
- so_resume();
+ memzero(decoder.DCTblock);
}
- if (bit8 == 0)
+ if (!get_non_intra_block(&last))
{
- if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
-
- ipuRegs->ctrl.SCD = 1;
+ return false;
}
- while (!getBits32((u8*)&ipuRegs->top, 0))
- {
- so_resume();
- }
+ mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
- BigEndian(ipuRegs->top, ipuRegs->top);
-
- /*if(ipuRegs->ctrl.SCD)
- {
- switch(ipuRegs->top & 0xFFFFFFF0)
- {
- case 0x100:
- case 0x1A0:
- break;
- case 0x1B0:
- ipuRegs->ctrl.SCD = 0;
- if(ipuRegs->top == 0x1b4) ipuRegs->ctrl.ECD = 1;
- //else
- //{
- // do
- // {
- // while(!getBits32((u8*)&ipuRegs->top, 1))
- // {
- // so_resume();
- // }
-
- // BigEndian(ipuRegs->top, ipuRegs->top);
- // }
- // while((ipuRegs->top & 0xfffffff0) != 0x100);
- //}
- break;
- default:
- ipuRegs->ctrl.SCD = 0;
- break;
- }
- }*/
+ return true;
}
-void __forceinline finishmpeg2sliceIDEC(decoder_t* &decoder)
+void __forceinline finishmpeg2sliceIDEC()
{
ipuRegs->ctrl.SCD = 0;
- coded_block_pattern = decoder->coded_block_pattern;
+ coded_block_pattern = decoder.coded_block_pattern;
- g_BP.BP += decoder->bitstream_bits - 16;
+ g_BP.BP += decoder.bitstream_bits - 16;
if ((int)g_BP.BP < 0)
{
@@ -1122,103 +691,133 @@ void __forceinline finishmpeg2sliceIDEC(decoder_t* &decoder)
}
FillInternalBuffer(&g_BP.BP, 1, 0);
-
- waitForSCD();
}
-void mpeg2sliceIDEC(void* pdone)
+bool mpeg2sliceIDEC()
{
u32 read;
+ u16 code;
+ u8 bit8;
- bool resumed = false;
- decoder_t *decoder = &g_decoder;
-
- *(int*)pdone = 0;
- bitstream_init(decoder);
-
- decoder->dc_dct_pred[0] =
- decoder->dc_dct_pred[1] =
- decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
-
- decoder->mbc = 0;
- ipuRegs->ctrl.ECD = 0;
-
- if (UBITS(decoder->bitstream_buf, 2) == 0)
- {
- ipuRegs->ctrl.SCD = 0;
- }
- else
+ switch (ipu_cmd.pos[0])
{
+ case 0:
+ decoder.dc_dct_pred[0] =
+ decoder.dc_dct_pred[1] =
+ decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+
+ decoder.mbc = 0;
+ ipuRegs->top = 0;
+ ipuRegs->ctrl.ECD = 0;
+
+ case 1:
+ ipu_cmd.pos[0] = 1;
+ if (!bitstream_init())
+ {
+ return false;
+ }
+
+ case 2:
+ ipu_cmd.pos[0] = 2;
while (1)
{
int DCT_offset, DCT_stride;
- int mba_inc;
const MBAtab * mba;
- NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
- decoder->macroblock_modes = get_macroblock_modes(decoder);
-
- /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
-
- if (decoder->macroblock_modes & MACROBLOCK_QUANT) //only IDEC
+ switch (ipu_cmd.pos[1])
{
- decoder->quantizer_scale = get_quantizer_scale(decoder);
- }
+ case 0:
+ decoder.macroblock_modes = get_macroblock_modes();
- if (decoder->macroblock_modes & DCT_TYPE_INTERLACED)
- {
- DCT_offset = decoder->stride;
- DCT_stride = decoder->stride * 2;
- }
- else
- {
- DCT_offset = decoder->stride * 8;
- DCT_stride = decoder->stride;
- }
-
- if (decoder->macroblock_modes & MACROBLOCK_INTRA)
- {
- decoder->coded_block_pattern = 0x3F;//all 6 blocks
- //ipuRegs->ctrl.CBP = 0x3f;
-
- memzero(*decoder->mb8);
- memzero(*decoder->rgb32);
-
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y, DCT_stride);
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride);
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset, DCT_stride);
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset + 8, DCT_stride);
- slice_intra_DCT(decoder, 1, (u8*)decoder->mb8->Cb, decoder->stride >> 1);
- slice_intra_DCT(decoder, 2, (u8*)decoder->mb8->Cr, decoder->stride >> 1);
-
- // Send The MacroBlock via DmaIpuFrom
-
- if (decoder->ofm == 0)
+ if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
{
- ipu_csc(decoder->mb8, decoder->rgb32, decoder->sgn);
+ decoder.quantizer_scale = get_quantizer_scale();
+ }
- g_nIPU0Data = 64;
- g_pIPU0Pointer = (u8*)decoder->rgb32;
- //if ( s_frame >= 39 ) SaveRGB32(g_pIPU0Pointer);
+ decoder.coded_block_pattern = 0x3F;//all 6 blocks
+ memzero(*decoder.mb8);
+ memzero(*decoder.rgb32);
+
+ case 1:
+ ipu_cmd.pos[1] = 1;
+
+ if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
+ {
+ DCT_offset = decoder.stride;
+ DCT_stride = decoder.stride * 2;
}
else
{
- ipu_csc(decoder->mb8, decoder->rgb32, decoder->sgn);
- ipu_dither(decoder->rgb32, decoder->rgb16, decoder->dte);
-
- g_nIPU0Data = 32;
- g_pIPU0Pointer = (u8*)decoder->rgb16;
- //if ( s_frame >= 39 ) SaveRGB32(g_pIPU0Pointer);
+ DCT_offset = decoder.stride * 8;
+ DCT_stride = decoder.stride;
}
+ switch (ipu_cmd.pos[2])
+ {
+ case 0:
+ case 1:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[2] == 1))
+ {
+ ipu_cmd.pos[2] = 1;
+ return false;
+ }
+ case 2:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
+ {
+ ipu_cmd.pos[2] = 2;
+ return false;
+ }
+ case 3:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
+ {
+ ipu_cmd.pos[2] = 3;
+ return false;
+ }
+ case 4:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
+ {
+ ipu_cmd.pos[2] = 4;
+ return false;
+ }
+ case 5:
+ if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[2] == 5))
+ {
+ ipu_cmd.pos[2] = 5;
+ return false;
+ }
+ case 6:
+ if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[2] == 6))
+ {
+ ipu_cmd.pos[2] = 6;
+ return false;
+ }
+ }
+
+ // Send The MacroBlock via DmaIpuFrom
+ ipu_csc(decoder.mb8, decoder.rgb32, decoder.sgn);
+
+ if (decoder.ofm == 0)
+ {
+ g_nIPU0Data = 64;
+ g_pIPU0Pointer = (u8*)decoder.rgb32;
+ }
+ else
+ {
+ ipu_dither(decoder.rgb32, decoder.rgb16, decoder.dte);
+
+ g_nIPU0Data = 32;
+ g_pIPU0Pointer = (u8*)decoder.rgb16;
+ }
+
+ case 2:
while (g_nIPU0Data > 0)
{
read = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data);
if (read == 0)
{
- so_resume();
- resumed = true;
+ ipu_cmd.pos[1] = 2;
+ return false;
}
else
{
@@ -1228,289 +827,322 @@ void mpeg2sliceIDEC(void* pdone)
}
}
- decoder->mbc++;
- }
-
- NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
- mba_inc = 0;
-
- while (1)
- {
- if (decoder->bitstream_buf >= 0x10000000)
+ decoder.mbc++;
+ mbaCount = 0;
+ case 3:
+ while (1)
{
- mba = MBA_5 + (UBITS(decoder->bitstream_buf, 5) - 2);
- break;
- }
- else if (decoder->bitstream_buf >= 0x03000000)
- {
- mba = MBA_11 + (UBITS(decoder->bitstream_buf, 11) - 24);
- break;
- }
- else switch (UBITS(decoder->bitstream_buf, 11))
+ if (!GETWORD())
{
-
- case 8: /* macroblock_escape */
- mba_inc += 33;
- /* pass through */
-
- case 15: /* macroblock_stuffing (MPEG1 only) */
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11);
- NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
- continue;
-
- default: /* end of slice/frame, or error? */
- {
-#ifdef MPEGHACK
- if (!resumed) so_resume();
-#endif
- finishmpeg2sliceIDEC(decoder);
-
- *(int*)pdone = 1;
- so_exit();
- }
+ ipu_cmd.pos[1] = 3;
+ return false;
}
- }
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, mba->len);
- mba_inc += mba->mba;
+ code = UBITS(16);
+ if (code >= 0x1000)
+ {
+ mba = MBA_5 + (UBITS(5) - 2);
+ break;
+ }
+ else if (code >= 0x0300)
+ {
+ mba = MBA_11 + (UBITS(11) - 24);
+ break;
+ }
+ else switch (UBITS(11))
+ {
+ case 8: /* macroblock_escape */
+ mbaCount += 33;
+ /* pass through */
- if (mba_inc)
- {
- decoder->dc_dct_pred[0] =
- decoder->dc_dct_pred[1] =
- decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
+ case 15: /* macroblock_stuffing (MPEG1 only) */
+ DUMPBITS(11);
+ continue;
- do
- {
- decoder->mbc++;
+ default: /* end of slice/frame, or error? */
+ {
+ goto finish_idec;
+ }
+ }
}
- while (--mba_inc);
+
+ DUMPBITS(mba->len);
+ mbaCount += mba->mba;
+
+ if (mbaCount)
+ {
+ decoder.dc_dct_pred[0] =
+ decoder.dc_dct_pred[1] =
+ decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+
+ decoder.mbc += mbaCount;
+ }
+
+ case 4:
+ if (!GETWORD())
+ {
+ ipu_cmd.pos[1] = 4;
+ return false;
+ }
+
+ break;
}
+
+ ipu_cmd.pos[1] = 0;
+ ipu_cmd.pos[2] = 0;
}
+
+finish_idec:
+ finishmpeg2sliceIDEC();
+
+ case 3:
+ bit8 = 1;
+ if (!getBits8((u8*)&bit8, 0))
+ {
+ ipu_cmd.pos[0] = 3;
+ return false;
+ }
+
+ if (bit8 == 0)
+ {
+ if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
+
+ ipuRegs->ctrl.SCD = 1;
+ }
+
+ case 4:
+ if (!getBits32((u8*)&ipuRegs->top, 0))
+ {
+ ipu_cmd.pos[0] = 4;
+ return false;
+ }
+
+ BigEndian(ipuRegs->top, ipuRegs->top);
+ break;
}
-#ifdef MPEGHACK
- if (!resumed) so_resume();
-#endif
-
- finishmpeg2sliceIDEC(decoder);
-
- *(int*)pdone = 1;
- so_exit();
+ return true;
}
-void mpeg2_slice(void* pdone)
+bool mpeg2_slice()
{
int DCT_offset, DCT_stride;
- //u8 bit8=0;
- //u32 fp = g_BP.FP;
- u32 bp;
- decoder_t * decoder = &g_decoder;
- u32 size = 0;
+ u8 bit8;
+ u32 size;
- *(int*)pdone = 0;
- ipuRegs->ctrl.ECD = 0;
-
- memzero(*decoder->mb8);
- memzero(*decoder->mb16);
-
- bitstream_init(decoder);
-
- if (decoder->dcr)
+ switch (ipu_cmd.pos[0])
{
- decoder->dc_dct_pred[0] =
- decoder->dc_dct_pred[1] =
- decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
- }
-
- if (decoder->macroblock_modes & DCT_TYPE_INTERLACED)
- {
- DCT_offset = decoder->stride;
- DCT_stride = decoder->stride * 2;
- }
- else
- {
- DCT_offset = decoder->stride * 8;
- DCT_stride = decoder->stride;
- }
-
- if (decoder->macroblock_modes & MACROBLOCK_INTRA)
- {
- decoder->coded_block_pattern = 0x3F;//all 6 blocks
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y, DCT_stride);
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride);
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset, DCT_stride);
- slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset + 8, DCT_stride);
- slice_intra_DCT(decoder, 1, (u8*)decoder->mb8->Cb, decoder->stride >> 1);
- slice_intra_DCT(decoder, 2, (u8*)decoder->mb8->Cr, decoder->stride >> 1);
- ipu_copy(decoder->mb8, decoder->mb16);
- }
- else
- {
- if (decoder->macroblock_modes & MACROBLOCK_PATTERN)
+ case 0:
+ if (decoder.dcr)
{
- decoder->coded_block_pattern = get_coded_block_pattern(decoder);
- /* JayteeMaster: changed from mb8 to mb16 and from u8 to s16 */
-
- if (decoder->coded_block_pattern & 0x20) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y, DCT_stride);
- if (decoder->coded_block_pattern & 0x10) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + 8, DCT_stride);
- if (decoder->coded_block_pattern & 0x08) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + DCT_offset, DCT_stride);
- if (decoder->coded_block_pattern & 0x04) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + DCT_offset + 8, DCT_stride);
- if (decoder->coded_block_pattern & 0x2) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Cb, decoder->stride >> 1);
- if (decoder->coded_block_pattern & 0x1) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Cr, decoder->stride >> 1);
-
+ decoder.dc_dct_pred[0] =
+ decoder.dc_dct_pred[1] =
+ decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
}
- }
-
- //Send The MacroBlock via DmaIpuFrom
-
- size = 0; // Reset
- ipuRegs->ctrl.SCD = 0;
- coded_block_pattern = decoder->coded_block_pattern;
- bp = g_BP.BP;
- g_BP.BP += ((int)decoder->bitstream_bits - 16);
-
- // BP goes from 0 to 128, so negative values mean to read old buffer
- // so we minus from 128 to get the correct BP
- if ((int)g_BP.BP < 0)
- {
- g_BP.BP = 128 + (int)g_BP.BP;
-
- // After BP is positioned correctly, we need to reload the old buffer
- // so that reading may continue properly
- ReorderBitstream();
- }
-
- FillInternalBuffer(&g_BP.BP, 1, 0);
-
- decoder->mbc = 1;
- g_nIPU0Data = 48;
- g_pIPU0Pointer = (u8*)decoder->mb16;
-
- while (g_nIPU0Data > 0)
- {
- size = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data);
-
- if (size == 0)
+
+ ipuRegs->ctrl.ECD = 0;
+ ipuRegs->top = 0;
+ memzero(*decoder.mb8);
+ memzero(*decoder.mb16);
+ case 1:
+ if (!bitstream_init())
{
- so_resume();
+ ipu_cmd.pos[0] = 1;
+ return false;
+ }
+
+ case 2:
+ ipu_cmd.pos[0] = 2;
+
+ if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
+ {
+ DCT_offset = decoder.stride;
+ DCT_stride = decoder.stride * 2;
}
else
{
- g_pIPU0Pointer += size * 16;
- g_nIPU0Data -= size;
- }
- }
- waitForSCD();
-
- decoder->bitstream_bits = 0;
- *(int*)pdone = 1;
- so_exit();
-}
-
-int __forceinline get_motion_delta(decoder_t * const decoder,
- const int f_code)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
- int delta;
- int sign;
- const MVtab * tab;
-
- if ((bit_buf & 0x80000000))
- {
- DUMPBITS(bit_buf, bits, 1);
- return 0x00010000;
- }
- else if ((bit_buf & 0xf0000000) || ((bit_buf & 0xfc000000) == 0x0c000000))
- {
-
- tab = MV_4 + UBITS(bit_buf, 4);
- delta = (tab->delta << f_code) + 1;
- bits += tab->len + f_code + 1;
- bit_buf <<= tab->len;
-
- sign = SBITS(bit_buf, 1);
- bit_buf <<= 1;
-
- if (f_code) delta += UBITS(bit_buf, f_code);
-
- bit_buf <<= f_code;
-
- return (delta ^ sign) - sign;
-
- }
- else
- {
- tab = MV_10 + UBITS(bit_buf, 10);
- delta = (tab->delta << f_code) + 1;
- bits += tab->len + 1;
- bit_buf <<= tab->len;
-
- sign = SBITS(bit_buf, 1);
- bit_buf <<= 1;
-
- if (f_code)
- {
- NEEDBITS(bit_buf, bits, bit_ptr);
- delta += UBITS(bit_buf, f_code);
- DUMPBITS(bit_buf, bits, f_code);
+ DCT_offset = decoder.stride * 8;
+ DCT_stride = decoder.stride;
}
- return (delta ^ sign) - sign;
-
- }
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-int __forceinline get_dmv(decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
- const DMVtab * tab;
-
- tab = DMV_2 + UBITS(bit_buf, 2);
- DUMPBITS(bit_buf, bits, tab->len);
- return tab->dmv;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
-
-int get_macroblock_address_increment(decoder_t * const decoder)
-{
- const MBAtab *mba;
-
- if (decoder->bitstream_buf >= 0x10000000)
- mba = MBA_5 + (UBITS(decoder->bitstream_buf, 5) - 2);
- else if (decoder->bitstream_buf >= 0x03000000)
- mba = MBA_11 + (UBITS(decoder->bitstream_buf, 11) - 24);
- else switch (UBITS(decoder->bitstream_buf, 11))
+ if (decoder.macroblock_modes & MACROBLOCK_INTRA)
{
-
- case 8: /* macroblock_escape */
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11);
- return 0x23;
-
- case 15: /* macroblock_stuffing (MPEG1 only) */
- if (decoder->mpeg1)
+ switch(ipu_cmd.pos[1])
+ {
+ case 0:
+ decoder.coded_block_pattern = 0x3F;
+ case 1:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[1] == 1))
{
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11);
- return 0x22;
+ ipu_cmd.pos[1] = 1;
+ return false;
}
+ case 2:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
+ {
+ ipu_cmd.pos[1] = 2;
+ return false;
+ }
+ case 3:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
+ {
+ ipu_cmd.pos[1] = 3;
+ return false;
+ }
+ case 4:
+ if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
+ {
+ ipu_cmd.pos[1] = 4;
+ return false;
+ }
+ case 5:
+ if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
+ {
+ ipu_cmd.pos[1] = 5;
+ return false;
+ }
+ case 6:
+ if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
+ {
+ ipu_cmd.pos[1] = 6;
+ return false;
+ }
+ break;
+ }
- default:
- return 0;//error
+ ipu_copy(decoder.mb8, decoder.mb16);
+ }
+ else
+ {
+ if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
+ {
+ switch(ipu_cmd.pos[1])
+ {
+ case 0:
+ decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
+ case 1:
+ if (decoder.coded_block_pattern & 0x20)
+ {
+ if (!slice_non_intra_DCT((s16*)decoder.mb16->Y, DCT_stride, ipu_cmd.pos[1] == 1))
+ {
+ ipu_cmd.pos[1] = 1;
+ return false;
+ }
+ }
+ case 2:
+ if (decoder.coded_block_pattern & 0x10)
+ {
+ if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
+ {
+ ipu_cmd.pos[1] = 2;
+ return false;
+ }
+ }
+ case 3:
+ if (decoder.coded_block_pattern & 0x08)
+ {
+ if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
+ {
+ ipu_cmd.pos[1] = 3;
+ return false;
+ }
+ }
+ case 4:
+ if (decoder.coded_block_pattern & 0x04)
+ {
+ if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
+ {
+ ipu_cmd.pos[1] = 4;
+ return false;
+ }
+ }
+ case 5:
+ if (decoder.coded_block_pattern & 0x2)
+ {
+ if (!slice_non_intra_DCT((s16*)decoder.mb16->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
+ {
+ ipu_cmd.pos[1] = 5;
+ return false;
+ }
+ }
+ case 6:
+ if (decoder.coded_block_pattern & 0x1)
+ {
+ if (!slice_non_intra_DCT((s16*)decoder.mb16->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
+ {
+ ipu_cmd.pos[1] = 6;
+ return false;
+ }
+ }
+ break;
+ }
+ }
}
- DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, mba->len);
+ //Send The MacroBlock via DmaIpuFrom
+ size = 0; // Reset
+ ipuRegs->ctrl.SCD = 0;
+ coded_block_pattern = decoder.coded_block_pattern;
+ g_BP.BP += (int)decoder.bitstream_bits - 16;
- return mba->mba + 1;
-}
+ // BP goes from 0 to 128, so negative values mean to read old buffer
+ // so we minus from 128 to get the correct BP
+ if ((int)g_BP.BP < 0)
+ {
+ g_BP.BP = 128 + (int)g_BP.BP;
+
+ // After BP is positioned correctly, we need to reload the old buffer
+ // so that reading may continue properly
+ ReorderBitstream();
+ }
+
+ decoder.mbc = 1;
+ g_nIPU0Data = 48;
+ g_pIPU0Pointer = (u8*)decoder.mb16;
+
+ case 3:
+ while (g_nIPU0Data > 0)
+ {
+ size = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data);
+
+ if (size == 0)
+ {
+ ipu_cmd.pos[0] = 3;
+ return false;
+ }
+ else
+ {
+ g_pIPU0Pointer += size * 16;
+ g_nIPU0Data -= size;
+ }
+ }
+
+ case 4:
+ bit8 = 1;
+ if (!getBits8((u8*)&bit8, 0))
+ {
+ ipu_cmd.pos[0] = 4;
+ return false;
+ }
+
+ if (bit8 == 0)
+ {
+ if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
+
+ ipuRegs->ctrl.SCD = 1;
+ }
+
+ case 5:
+ if (!getBits32((u8*)&ipuRegs->top, 0))
+ {
+ ipu_cmd.pos[0] = 5;
+ return false;
+ }
+
+ BigEndian(ipuRegs->top, ipuRegs->top);
+ decoder.bitstream_bits = 0;
+ break;
+ }
+
+ return true;
+}
\ No newline at end of file
diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h
index 3c8cb79e11..2860e4f53b 100644
--- a/pcsx2/IPU/mpeg2lib/Mpeg.h
+++ b/pcsx2/IPU/mpeg2lib/Mpeg.h
@@ -99,7 +99,6 @@ struct decoder_t {
/* bit parsing stuff */
u32 bitstream_buf; /* current 32 bit working set */
int bitstream_bits; /* used bits in working set */
- u8 * bitstream_ptr; /* buffer with stream data; 128 bits buffer */
struct macroblock_8 *mb8;
struct macroblock_16 *mb16;
@@ -173,13 +172,13 @@ extern void (__fastcall *mpeg2_idct_add) (int last, s16 * block, s16* dest, int
#define IDEC 0
#define BDEC 1
-void mpeg2sliceIDEC(void* pdone);
-void mpeg2_slice(void* pdone);
-int get_macroblock_address_increment(decoder_t * const decoder);
-int get_macroblock_modes (decoder_t * const decoder);
+bool mpeg2sliceIDEC();
+bool mpeg2_slice();
+int get_macroblock_address_increment();
+int get_macroblock_modes();
-extern int get_motion_delta (decoder_t * const decoder, const int f_code);
-extern int get_dmv (decoder_t * const decoder);
+extern int get_motion_delta(const int f_code);
+extern int get_dmv();
extern int non_linear_quantizer_scale[];
extern decoder_t g_decoder;
@@ -189,7 +188,7 @@ void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb1
void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4);
void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16);
-int slice (decoder_t * const decoder, u8 * buffer);
+int slice (u8 * buffer);
/* idct.c */
void mpeg2_idct_init ();
@@ -199,4 +198,10 @@ void mpeg2_idct_init ();
#define BigEndian(out, in) out = __builtin_bswap32(in) // or we could use the asm function bswap...
#endif
+#ifdef _MSC_VER
+#define BigEndian64(out, in) out = _byteswap_uint64(in)
+#else
+#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap...
+#endif
+
#endif//__MPEG_H__
diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h
index 4867b2175c..69727beac6 100644
--- a/pcsx2/IPU/mpeg2lib/Vlc.h
+++ b/pcsx2/IPU/mpeg2lib/Vlc.h
@@ -25,55 +25,70 @@
#ifndef __VLC_H__
#define __VLC_H__
-#include "IPU/coroutine.h"
-
static u8 data[2];
-static u8 dword[4];
+//static u8 word[4];
+//static u8 dword[8];
+//static u8 qword[16];
extern tIPU_BP g_BP;
-extern decoder_t g_decoder;
+extern decoder_t decoder;
extern void ReorderBitstream();
-static __forceinline void GETWORD(u32 * bit_buf,int bits)
+static __forceinline int GETWORD()
{
- while(!getBits16(data,1))
+ if (decoder.bitstream_bits > 0)
{
- so_resume();
+ if(!getBits16(data,1))
+ {
+ return 0;
+ }
+
+ /*u32 data;
+ BigEndian(data, *(u32*)word);
+ decoder.bitstream_buf |= (u64)data << decoder.bitstream_bits;
+ decoder.bitstream_bits -= 32;*/
+ decoder.bitstream_buf |= ((u32)(((u16)data[0] << 8) | data[1])) << decoder.bitstream_bits;
+ decoder.bitstream_bits -= 16;
}
- *bit_buf |= ((data[0] << 8) | data[1]) << (bits);
+
+ return 1;
}
-static __forceinline void bitstream_init (decoder_t * decoder){
- decoder->bitstream_bits = -16;
+static __forceinline int bitstream_init ()
+{
+ if (!getBits32((u8*)&decoder.bitstream_buf, 1))
+ {
+ return 0;
+ }
- while( !getBits32(dword, 1) )
- so_resume();
+ decoder.bitstream_bits = -16;
+ BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
+ /*decoder.bitstream_buf = *(u64*)dword;
+ BigEndian64(decoder.bitstream_buf, decoder.bitstream_buf);*/
- decoder->bitstream_buf = (dword[0] << 24) | (dword[1] << 16) |
- (dword[2] << 8) |dword[3];
+ return 1;
}
-/* make sure that there are at least 16 valid bits in bit_buf */
-#define NEEDBITS(bit_buf,bits,bit_ptr) \
-do { \
- if (bits > 0) { \
- GETWORD(&bit_buf,bits); \
- bits -= 16; \
- } \
-} while (0)
-
/* remove num valid bits from bit_buf */
-#define DUMPBITS(bit_buf,bits,num) \
-do { \
- /*IPU_LOG("DUMPBITS %d\n",num);*/ \
- bit_buf <<= (num); \
- bits += (num); \
-} while (0)
+static __forceinline void DUMPBITS(int num)
+{
+ decoder.bitstream_buf <<= num;
+ decoder.bitstream_bits += num;
+}
/* take num bits from the high part of bit_buf and zero extend them */
-#define UBITS(bit_buf,num) (((u32)(bit_buf)) >> (32 - (num)))
+#define UBITS(num) (((u32)decoder.bitstream_buf) >> (32 - (num)))
/* take num bits from the high part of bit_buf and sign extend them */
-#define SBITS(bit_buf,num) (((s32)(bit_buf)) >> (32 - (num)))
+#define SBITS(num) (((s32)decoder.bitstream_buf) >> (32 - (num)))
+
+/* Get bits from bitstream */
+static __forceinline u32 GETBITS(int num)
+{
+ u16 retVal = UBITS(num);
+ DUMPBITS(num);
+
+ return retVal;
+}
struct MBtab {
u8 modes;
@@ -443,4 +458,247 @@ static const MBAtab MBA_11 [] = {
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}
};
+
+// New
+
+
+/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
+static MBAtab MBAtab1[16] =
+{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4},
+ {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3}
+};
+
+/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */
+static MBAtab MBAtab2[104] =
+{
+ {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11},
+ {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10},
+ {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10},
+ {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8},
+ {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8},
+ {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8},
+ {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8},
+ {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8},
+ {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8},
+ {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7},
+ {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7},
+ {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7},
+ {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}
+};
+
+/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
+static const DCtab DClumtab0[32] =
+{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+ {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0}
+};
+
+/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
+static const DCtab DClumtab1[16] =
+{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
+ {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9}
+};
+
+/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
+static const DCtab DCchromtab0[32] =
+{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0}
+};
+
+/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
+static const DCtab DCchromtab1[32] =
+{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+ {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
+ {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0100 ... 1xxx (used for first (DC) coefficient)
+ */
+static const DCTtab DCTtabfirst[12] =
+{
+ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
+ {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
+ {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0100 ... 1xxx (used for all other coefficients)
+ */
+static const DCTtab DCTtabnext[12] =
+{
+ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
+ {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 000001xx ... 00111xxx
+ */
+static const DCTtab DCTtab0[60] =
+{
+ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
+ {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
+ {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
+ {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
+ {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
+ {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
+ {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
+ {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
+ {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
+ {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
+ {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
+ {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
+ {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}
+};
+
+/* Table B-15, DCT coefficients table one,
+ * codes 000001xx ... 11111111
+*/
+static const DCTtab DCTtab0a[252] =
+{
+ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
+ {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
+ {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
+ {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
+ {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
+ {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
+ {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
+ {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
+ {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
+ {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
+ {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
+ {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
+ {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+ {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+ {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
+ {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
+ {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
+ {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
+ {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
+ {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
+ {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
+ {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
+ {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8}
+};
+
+/* Table B-14, DCT coefficients table zero,
+ * codes 0000001000 ... 0000001111
+ */
+static const DCTtab DCTtab1[8] =
+{
+ {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
+ {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
+};
+
+/* Table B-15, DCT coefficients table one,
+ * codes 000000100x ... 000000111x
+ */
+static const DCTtab DCTtab1a[8] =
+{
+ {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
+ {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 000000010000 ... 000000011111
+ */
+static const DCTtab DCTtab2[16] =
+{
+ {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
+ {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
+ {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
+ {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 0000000010000 ... 0000000011111
+ */
+static const DCTtab DCTtab3[16] =
+{
+ {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
+ {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
+ {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
+ {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 00000000010000 ... 00000000011111
+ */
+static const DCTtab DCTtab4[16] =
+{
+ {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
+ {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
+ {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
+ {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 000000000010000 ... 000000000011111
+ */
+static const DCTtab DCTtab5[16] =
+{
+ {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
+ {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
+ {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
+ {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15}
+};
+
+/* Table B-14/15, DCT coefficients table zero / one,
+ * codes 0000000000010000 ... 0000000000011111
+ */
+static const DCTtab DCTtab6[16] =
+{
+ {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
+ {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
+ {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
+ {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16}
+};
+
#endif//__VLC_H__
diff --git a/pcsx2/NakedAsm.h b/pcsx2/NakedAsm.h
index c14e6e7d6b..0446e243c8 100644
--- a/pcsx2/NakedAsm.h
+++ b/pcsx2/NakedAsm.h
@@ -17,17 +17,6 @@
#ifndef NAKED_ASM_H
#define NAKED_ASM_H
-#include "IPU/coroutine.h"
-
-// Common to Windows and Linux
-extern "C"
-{
- // acoroutine.S
- void so_call(coroutine_t coro);
- void so_resume(void);
- void so_exit(void);
-}
-
#ifdef __LINUX__
extern "C"
diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
index 9c67ac3571..f76dc73072 100644
--- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
+++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
@@ -1254,14 +1254,6 @@
-
-
-
-
@@ -1270,7 +1262,7 @@
>