Added a threadless state managed IPU. The code is still in it's early stages and will now be worked on to optimize for speed. The first optimization is to increase the read size in Vlc.h from 32 bit to 64 bit.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3568 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
msakhtar 2010-07-26 10:18:28 +00:00
parent 83604ec59d
commit 5165350170
12 changed files with 1329 additions and 1893 deletions

View File

@ -36,12 +36,9 @@
// IPU Inline'd IRQs : Calls the IPU interrupt handlers directly instead of
// feeding them through the EE's branch test. (see IPU.h for details)
static tIPU_DMA g_nDMATransfer(0);
static tIPU_cmd ipu_cmd;
static IPUStatus IPU1Status;
tIPU_DMA g_nDMATransfer(0);
tIPU_cmd ipu_cmd;
IPUStatus IPU1Status;
// FIXME - g_nIPU0Data and Pointer are not saved in the savestate, which breaks savestates for some
// FMVs at random (if they get saved during the half frame of a 30fps rate). The fix is complicated
@ -53,9 +50,6 @@ void ReorderBitstream();
// the BP doesn't advance and returns -1 if there is no data to be read
tIPU_BP g_BP;
static coroutine_t s_routine; // used for executing BDEC/IDEC
static int s_RoutineDone = 0;
static u32 s_tempstack[0x4000]; // 64k
void IPUWorker();
@ -78,7 +72,7 @@ __aligned16 macroblock_rgb16 rgb16;
u8 indx4[16*16/2];
bool mpeg2_inited = false; //mpeg2_idct_init() must be called only once
u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'};
decoder_t g_decoder; //static, only to place it in bss
decoder_t decoder; //static, only to place it in bss
decoder_t tempdec;
extern "C"
@ -98,14 +92,14 @@ __forceinline void IPUProcessInterrupt()
void init_g_decoder()
{
//other stuff
g_decoder.intra_quantizer_matrix = (u8*)iq;
g_decoder.non_intra_quantizer_matrix = (u8*)niq;
g_decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
g_decoder.mb8 = &mb8;
g_decoder.mb16 = &mb16;
g_decoder.rgb32 = &rgb32;
g_decoder.rgb16 = &rgb16;
g_decoder.stride = 16;
decoder.intra_quantizer_matrix = (u8*)iq;
decoder.non_intra_quantizer_matrix = (u8*)niq;
decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
decoder.mb8 = &mb8;
decoder.mb16 = &mb16;
decoder.rgb32 = &rgb32;
decoder.rgb16 = &rgb16;
decoder.stride = 16;
}
void mpeg2_init()
@ -159,7 +153,7 @@ void ReportIPU()
Console.WriteLn("vqclut = 0x%x.", vqclut);
Console.WriteLn("s_thresh = 0x%x.", s_thresh);
Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern);
Console.WriteLn("g_decoder = 0x%x.", g_decoder);
Console.WriteLn("g_decoder = 0x%x.", decoder);
Console.WriteLn("mpeg2: scan_norm = 0x%x, alt = 0x%x.", mpeg2_scan_norm, mpeg2_scan_alt);
Console.WriteLn(ipu_cmd.desc());
Console.WriteLn("_readbits = 0x%x. readbits - _readbits, which is also frozen, is 0x%x.",
@ -186,7 +180,7 @@ void SaveStateBase::ipuFreeze()
Freeze(vqclut);
Freeze(s_thresh);
Freeze(coded_block_pattern);
Freeze(g_decoder);
Freeze(decoder);
Freeze(mpeg2_scan_norm);
Freeze(mpeg2_scan_alt);
@ -377,72 +371,67 @@ static void ipuBCLR(u32 val)
IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP);
}
static BOOL ipuIDEC(u32 val)
static BOOL ipuIDEC(u32 val, bool resume)
{
tIPU_CMD_IDEC idec(val);
idec.log();
g_BP.BP += idec.FB;//skip FB bits
//from IPU_CTRL
ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;)
g_decoder.coding_type = ipuRegs->ctrl.PCT;
g_decoder.mpeg1 = ipuRegs->ctrl.MP1;
g_decoder.q_scale_type = ipuRegs->ctrl.QST;
g_decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
g_decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
g_decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
if (!resume)
{
idec.log();
g_BP.BP += idec.FB;//skip FB bits
//from IPU_CTRL
ipuRegs->ctrl.PCT = I_TYPE; //Intra DECoding;)
decoder.coding_type = ipuRegs->ctrl.PCT;
decoder.mpeg1 = ipuRegs->ctrl.MP1;
decoder.q_scale_type = ipuRegs->ctrl.QST;
decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
//from IDEC value
g_decoder.quantizer_scale = idec.QSC;
g_decoder.frame_pred_frame_dct = !idec.DTD;
g_decoder.sgn = idec.SGN;
g_decoder.dte = idec.DTE;
g_decoder.ofm = idec.OFM;
//from IDEC value
decoder.quantizer_scale = idec.QSC;
decoder.frame_pred_frame_dct = !idec.DTD;
decoder.sgn = idec.SGN;
decoder.dte = idec.DTE;
decoder.ofm = idec.OFM;
//other stuff
g_decoder.dcr = 1; // resets DC prediction value
//other stuff
decoder.dcr = 1; // resets DC prediction value
}
s_routine = so_create(mpeg2sliceIDEC, &s_RoutineDone, s_tempstack, sizeof(s_tempstack));
pxAssert(s_routine != NULL);
so_call(s_routine);
if (s_RoutineDone) s_routine = NULL;
return s_RoutineDone;
return mpeg2sliceIDEC();
}
static int s_bdec = 0;
static __forceinline BOOL ipuBDEC(u32 val)
static __forceinline BOOL ipuBDEC(u32 val, bool resume)
{
tIPU_CMD_BDEC bdec(val);
bdec.log(s_bdec);
if (IsDebugBuild) s_bdec++;
if (!resume)
{
bdec.log(s_bdec);
if (IsDebugBuild) s_bdec++;
g_BP.BP += bdec.FB;//skip FB bits
g_decoder.coding_type = I_TYPE;
g_decoder.mpeg1 = ipuRegs->ctrl.MP1;
g_decoder.q_scale_type = ipuRegs->ctrl.QST;
g_decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
g_decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
g_decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
g_BP.BP += bdec.FB;//skip FB bits
decoder.coding_type = I_TYPE;
decoder.mpeg1 = ipuRegs->ctrl.MP1;
decoder.q_scale_type = ipuRegs->ctrl.QST;
decoder.intra_vlc_format = ipuRegs->ctrl.IVF;
decoder.scan = ipuRegs->ctrl.AS ? mpeg2_scan_alt : mpeg2_scan_norm;
decoder.intra_dc_precision = ipuRegs->ctrl.IDP;
//from BDEC value
/* JayteeMaster: the quantizer (linear/non linear) depends on the q_scale_type */
g_decoder.quantizer_scale = g_decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
g_decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
g_decoder.dcr = bdec.DCR;
g_decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
//from BDEC value
decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
decoder.dcr = bdec.DCR;
decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
memzero(mb8);
memzero(mb16);
memzero(mb8);
memzero(mb16);
}
s_routine = so_create(mpeg2_slice, &s_RoutineDone, s_tempstack, sizeof(s_tempstack));
pxAssert(s_routine != NULL);
so_call(s_routine);
if (s_RoutineDone) s_routine = NULL;
return s_RoutineDone;
return mpeg2_slice();
}
static BOOL __fastcall ipuVDEC(u32 val)
@ -451,34 +440,34 @@ static BOOL __fastcall ipuVDEC(u32 val)
{
case 0:
ipuRegs->cmd.DATA = 0;
if (!getBits32((u8*)&g_decoder.bitstream_buf, 0)) return FALSE;
if (!getBits32((u8*)&decoder.bitstream_buf, 0)) return FALSE;
g_decoder.bitstream_bits = -16;
BigEndian(g_decoder.bitstream_buf, g_decoder.bitstream_buf);
decoder.bitstream_bits = -16;
BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
switch ((val >> 26) & 3)
{
case 0://Macroblock Address Increment
g_decoder.mpeg1 = ipuRegs->ctrl.MP1;
ipuRegs->cmd.DATA = get_macroblock_address_increment(&g_decoder);
decoder.mpeg1 = ipuRegs->ctrl.MP1;
ipuRegs->cmd.DATA = get_macroblock_address_increment();
break;
case 1://Macroblock Type //known issues: no error detected
g_decoder.frame_pred_frame_dct = 1;//prevent DCT_TYPE_INTERLACED
g_decoder.coding_type = ipuRegs->ctrl.PCT;
ipuRegs->cmd.DATA = get_macroblock_modes(&g_decoder);
case 1://Macroblock Type
decoder.frame_pred_frame_dct = 1;
decoder.coding_type = ipuRegs->ctrl.PCT;
ipuRegs->cmd.DATA = get_macroblock_modes();
break;
case 2://Motion Code //known issues: no error detected
ipuRegs->cmd.DATA = get_motion_delta(&g_decoder, 0);
case 2://Motion Code
ipuRegs->cmd.DATA = get_motion_delta(0);
break;
case 3://DMVector
ipuRegs->cmd.DATA = get_dmv(&g_decoder);
ipuRegs->cmd.DATA = get_dmv();
break;
}
g_BP.BP += (g_decoder.bitstream_bits + 16);
g_BP.BP += (int)decoder.bitstream_bits + 16;
if ((int)g_BP.BP < 0)
{
@ -486,9 +475,7 @@ static BOOL __fastcall ipuVDEC(u32 val)
ReorderBitstream();
}
FillInternalBuffer(&g_BP.BP, 1, 0);
ipuRegs->cmd.DATA = (ipuRegs->cmd.DATA & 0xFFFF) | ((g_decoder.bitstream_bits + 16) << 16);
ipuRegs->cmd.DATA = (ipuRegs->cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16);
ipuRegs->ctrl.ECD = (ipuRegs->cmd.DATA == 0);
case 1:
@ -529,7 +516,10 @@ static BOOL ipuSETIQ(u32 val)
if ((val >> 27) & 1)
{
ipu_cmd.pos[0] += getBits((u8*)niq + ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1); // 8*8*8
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
{
if (!getBits64((u8*)niq + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
IPU_LOG("Read non-intra quantization matrix from IPU FIFO.");
for (i = 0; i < 8; i++)
@ -541,7 +531,10 @@ static BOOL ipuSETIQ(u32 val)
}
else
{
ipu_cmd.pos[0] += getBits((u8*)iq + 8 * ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1);
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
{
if (!getBits64((u8*)iq + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
IPU_LOG("Read intra quantization matrix from IPU FIFO.");
for (i = 0; i < 8; i++)
@ -552,40 +545,40 @@ static BOOL ipuSETIQ(u32 val)
}
}
return ipu_cmd.pos[0] == 64;
return TRUE;
}
static BOOL ipuSETVQ(u32 val)
{
ipu_cmd.pos[0] += getBits((u8*)vqclut + ipu_cmd.pos[0], 256 - 8 * ipu_cmd.pos[0], 1); // 16*2*8
if (ipu_cmd.pos[0] == 32)
for(;ipu_cmd.pos[0] < 4; ipu_cmd.pos[0]++)
{
IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO.");
IPU_LOG(
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
if (!getBits64((u8*)vqclut + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
return ipu_cmd.pos[0] == 32;
IPU_LOG("IPU SETVQ command.\nRead VQCLUT table from IPU FIFO.");
IPU_LOG(
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d"
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
"%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
return TRUE;
}
// IPU Transfers are split into 8Qwords so we need to send ALL the data
@ -596,17 +589,14 @@ static BOOL __fastcall ipuCSC(u32 val)
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
{
if (ipu_cmd.pos[0] < 3072 / 8)
for(;ipu_cmd.pos[0] < 48; ipu_cmd.pos[0]++)
{
ipu_cmd.pos[0] += getBits((u8*) & mb8 + ipu_cmd.pos[0], 3072 - 8 * ipu_cmd.pos[0], 1);
if (ipu_cmd.pos[0] < 3072 / 8) return FALSE;
ipu_csc(&mb8, &rgb32, 0);
if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE);
if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
ipu_csc(&mb8, &rgb32, 0);
if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE);
if (csc.OFM)
{
while (ipu_cmd.pos[1] < 32)
@ -641,18 +631,16 @@ static BOOL ipuPACK(u32 val)
for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
{
if (ipu_cmd.pos[0] < 512)
for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
{
ipu_cmd.pos[0] += getBits((u8*) & mb8 + ipu_cmd.pos[0], 512 - 8 * ipu_cmd.pos[0], 1);
if (ipu_cmd.pos[0] < 64) return FALSE;
ipu_csc(&mb8, &rgb32, 0);
ipu_dither(&rgb32, &rgb16, csc.DTE);
if (csc.OFM) ipu_vq(&rgb16, indx4);
if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
ipu_csc(&mb8, &rgb32, 0);
ipu_dither(&rgb32, &rgb16, csc.DTE);
if (csc.OFM) ipu_vq(&rgb16, indx4);
if (csc.OFM)
{
ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
@ -696,7 +684,7 @@ void IPUCMD_WRITE(u32 val)
ipuRegs->ctrl.ECD = 0;
ipuRegs->ctrl.SCD = 0; //clear ECD/SCD
ipuRegs->cmd.DATA = val;
ipu_cmd.pos[0] = 0;
ipu_cmd.clear();
switch (ipuRegs->cmd.CMD)
{
@ -759,29 +747,27 @@ void IPUCMD_WRITE(u32 val)
break;
case SCE_IPU_IDEC:
if (ipuIDEC(val))
if (ipuIDEC(val, false))
{
// idec done, ipu0 done too
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
return;
}
ipuRegs->topbusy = 0x80000000;
// have to resort to the thread
ipu_cmd.current = val >> 28;
ipuRegs->ctrl.BUSY = 1;
return;
break;
case SCE_IPU_BDEC:
if (ipuBDEC(val))
if (ipuBDEC(val, false))
{
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
}
ipuRegs->topbusy = 0x80000000;
ipu_cmd.current = val >> 28;
ipuRegs->ctrl.BUSY = 1;
return;
else
{
ipuRegs->topbusy = 0x80000000;
}
}
// have to resort to the thread
@ -850,8 +836,7 @@ void IPUWorker()
break;
case SCE_IPU_IDEC:
so_call(s_routine);
if (!s_RoutineDone)
if (!ipuIDEC(ipuRegs->cmd.DATA, true))
{
if(ipu1dma->chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
@ -865,12 +850,10 @@ void IPUWorker()
// CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
s_routine = NULL;
break;
case SCE_IPU_BDEC:
so_call(s_routine);
if (!s_RoutineDone)
if (!ipuBDEC(ipuRegs->cmd.DATA, true))
{
if(ipu1dma->chcr.STR == false) hwIntcIrq(INTC_IPU);
return;
@ -882,7 +865,6 @@ void IPUWorker()
ipu_cmd.current = 0xffffffff;
if (ipu0dma->qwc > 0 && ipu0dma->chcr.STR) IPU_INT0_FROM();
s_routine = NULL;
if (ipuRegs->ctrl.SCD || ipuRegs->ctrl.ECD) hwIntcIrq(INTC_IPU);
return;
@ -946,7 +928,7 @@ u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size)
g_BP.FP = 1;
}
if ((g_BP.FP < 2) && (*(int*)pointer + size) >= 128)
if ((g_BP.FP < 2) && ((*(int*)pointer + size) >= 128))
{
if (ipu_fifo.in.read(next_readbits())) g_BP.FP += 1;
}
@ -967,6 +949,83 @@ u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size)
return (g_BP.FP >= 1) ? g_BP.FP * 128 - (*(int*)pointer) : 0;
}
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits128(u8 *address, u32 advance)
{
u64 mask2;
u128 mask;
u32 shift;
u8* readpos;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 128) < 128) return 0;
readpos = readbits + (int)g_BP.BP / 8;
if (g_BP.BP & 7)
{
shift = g_BP.BP & 7;
mask2 = 0xff >> shift;
mask.lo = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
mask.hi = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
u128 notMask;
u128 data = *(u128*)(readpos + 1);
notMask.lo = ~mask.lo & data.lo;
notMask.hi = ~mask.hi & data.hi;
notMask.lo >>= 8 - shift;
notMask.lo |= (notMask.hi & (0xFFFFFFFFFFFFFFFF >> (64 - shift))) << (64 - shift);
notMask.hi >>= 8 - shift;
mask.hi = (((*(u128*)readpos).hi & mask.hi) << shift) | (((*(u128*)readpos).lo & mask.lo) >> (64 - shift));
mask.lo = ((*(u128*)readpos).lo & mask.lo) << shift;
notMask.lo |= mask.lo;
notMask.hi |= mask.hi;
*(u128*)address = notMask;
}
else
{
*(u128*)address = *(u128*)readpos;
}
if (advance) g_BP.BP += 128;
return 1;
}
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits64(u8 *address, u32 advance)
{
register u64 mask = 0;
int shift = 0;
u8* readpos;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 64) < 64) return 0;
readpos = readbits + (int)g_BP.BP / 8;
if (g_BP.BP & 7)
{
shift = g_BP.BP & 7;
mask = (0xff >> shift);
mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56);
*(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift);
}
else
{
*(u64*)address = *(u64*)readpos;
}
if (advance) g_BP.BP += 64;
return 1;
}
// whenever reading fractions of bytes. The low bits always come from the next byte
// while the high bits come from the current byte
u8 __fastcall getBits32(u8 *address, u32 advance)
@ -1053,102 +1112,6 @@ u8 __fastcall getBits8(u8 *address, u32 advance)
return 1;
}
int __fastcall getBits(u8 *address, u32 size, u32 advance)
{
register u32 mask = 0, shift = 0, howmuch;
u8* oldbits, *oldaddr = address;
u32 pointer = 0, temp;
// Check if the current BP has exceeded or reached the limit of 128
if (FillInternalBuffer(&g_BP.BP, 1, 8) < 8) return 0;
oldbits = readbits;
// Backup the current BP in case of VDEC/FDEC
pointer = g_BP.BP;
if (pointer & 7)
{
address--;
while (size)
{
if (shift == 0)
{
*++address = 0;
shift = 8;
}
temp = shift; // Lets not pass a register to min.
howmuch = min(min(8 - (pointer & 7), 128 - pointer), min(size, temp));
if (FillInternalBuffer(&pointer, advance, 8) < 8)
{
if (advance) g_BP.BP = pointer;
return address - oldaddr;
}
mask = ((0xFF >> (pointer & 7)) << (8 - howmuch - (pointer & 7))) & 0xFF;
mask &= readbits[((pointer) >> 3)];
mask >>= 8 - howmuch - (pointer & 7);
pointer += howmuch;
size -= howmuch;
shift -= howmuch;
*address |= mask << shift;
}
++address;
}
else
{
u8* readmem;
while (size)
{
if (FillInternalBuffer(&pointer, advance, 8) < 8)
{
if (advance) g_BP.BP = pointer;
return address -oldaddr;
}
howmuch = min(128 - pointer, size);
size -= howmuch;
readmem = readbits + (pointer >> 3);
pointer += howmuch;
howmuch >>= 3;
while (howmuch >= 4)
{
*(u32*)address = *(u32*)readmem;
howmuch -= 4;
address += 4;
readmem += 4;
}
switch (howmuch)
{
case 3:
address[2] = readmem[2];
case 2:
address[1] = readmem[1];
case 1:
address[0] = readmem[0];
case 0:
break;
jNO_DEFAULT
}
address += howmuch;
}
}
// If not advance then reset the Reading buffer value
if (advance)
g_BP.BP = pointer;
else
readbits = oldbits; // restore the last pointer
return address - oldaddr;
}
///////////////////// CORE FUNCTIONS /////////////////
void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V,
const int Src_BpS, const int Width, const int Height);
@ -1244,7 +1207,7 @@ static __forceinline void ipuDmacSrcChain()
{
case TAG_REFE: // refe
//if(IPU1Status.InProgress == false) ipu1dma->tadr += 16;
if(IPU1Status.DMAFinished == false) IPU1Status.DMAFinished = true;
IPU1Status.DMAFinished = true;
break;
case TAG_CNT: // cnt
// Set the taddr to the next tag
@ -1264,7 +1227,7 @@ static __forceinline void ipuDmacSrcChain()
case TAG_END: // end
ipu1dma->tadr = ipu1dma->madr;
if(IPU1Status.DMAFinished == false) IPU1Status.DMAFinished = true;
IPU1Status.DMAFinished = true;
break;
}
}
@ -1300,7 +1263,6 @@ static __forceinline int IPU1chain() {
if (ipu1dma->qwc > 0 && IPU1Status.InProgress == true)
{
int qwc = ipu1dma->qwc;
u32 *pMem;
@ -1308,7 +1270,8 @@ static __forceinline int IPU1chain() {
if (pMem == NULL)
{
Console.Error("ipu1dma NULL!"); return totalqwc;
Console.Error("ipu1dma NULL!");
return totalqwc;
}
//Write our data to the fifo
@ -1484,7 +1447,6 @@ int IPU1dma()
}
else
{
IPU_LOG("Here");
cpuRegs.eCycle[4] = 0x9999;//IPU_INT_TO(2048);
}
@ -1601,7 +1563,6 @@ __forceinline void dmaIPU1() // toIPU
IPU1Status.DMAMode = DMA_MODE_CHAIN;
IPU1dma();
//if (ipuRegs->ctrl.BUSY) IPUWorker();
}
else //Normal Mode
{
@ -1623,7 +1584,6 @@ __forceinline void dmaIPU1() // toIPU
IPU1Status.DMAFinished = true;
IPU1Status.DMAMode = DMA_MODE_NORMAL;
IPU1dma();
//if (ipuRegs->ctrl.BUSY) IPUWorker();
}
}
}

View File

@ -17,7 +17,6 @@
#define __IPU_H__
#include "mpeg2lib/Mpeg.h"
#include "coroutine.h"
#include "IPU_Fifo.h"
#ifdef _MSC_VER
@ -327,7 +326,7 @@ struct IPUregisters {
struct tIPU_cmd
{
int index;
int pos[2];
int pos[6];
int current;
void clear()
{
@ -342,12 +341,13 @@ struct tIPU_cmd
}
};
//extern tIPU_cmd ipu_cmd;
extern tIPU_cmd ipu_cmd;
extern tIPU_BP g_BP;
extern int coded_block_pattern;
extern int g_nIPU0Data; // or 0x80000000 whenever transferring
extern u8* g_pIPU0Pointer;
extern IPUStatus IPU1Status;
extern tIPU_DMA g_nDMATransfer;
// The IPU can only do one task at once and never uses other buffers so these
// should be made available to functions in other modules to save registers.
extern __aligned16 macroblock_rgb32 rgb32;
@ -376,10 +376,11 @@ extern int IPU0dma();
extern int IPU1dma();
extern u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size);
extern u8 __fastcall getBits128(u8 *address, u32 advance);
extern u8 __fastcall getBits64(u8 *address, u32 advance);
extern u8 __fastcall getBits32(u8 *address, u32 advance);
extern u8 __fastcall getBits16(u8 *address, u32 advance);
extern u8 __fastcall getBits8(u8 *address, u32 advance);
extern int __fastcall getBits(u8 *address, u32 size, u32 advance);
#endif

View File

@ -13,7 +13,6 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "Common.h"
#include "IPU_Fifo.h"
@ -106,20 +105,18 @@ int IPU_Fifo_Output::write(const u32 *value, int size)
ipuRegs->ctrl.OFC += firsttrans;
IPU0dma();
//Console.WriteLn("Written %d qwords, %d", firsttrans,ipuRegs->ctrl.OFC);
return firsttrans;
}
int IPU_Fifo_Input::read(void *value)
{
// wait until enough data
if (g_BP.IFC < 8)
// wait until enough data to ensure proper streaming.
if (g_BP.IFC < 4)
{
// IPU FIFO is empty and DMA is waiting so lets tell the DMA we are ready to put data in the FIFO
if(cpuRegs.eCycle[4] == 0x9999)
{
//DevCon.Warning("Setting ECycle");
CPU_INT( DMAC_TO_IPU, 4 );
}

View File

@ -1,78 +0,0 @@
.intel_syntax noprefix
.extern g_pCurrentRoutine
.globl so_call
so_call:
mov eax, dword ptr [esp+4]
test dword ptr [eax+24], 1
jnz RestoreRegs
mov [eax+8], ebx
mov [eax+12], esi
mov [eax+16], edi
mov [eax+20], ebp
mov dword ptr [eax+24], 1
jmp CallFn
RestoreRegs:
// have to load and save at the same time
mov ecx, [eax+8]
mov edx, [eax+12]
mov [eax+8], ebx
mov [eax+12], esi
mov ebx, ecx
mov esi, edx
mov ecx, [eax+16]
mov edx, [eax+20]
mov [eax+16], edi
mov [eax+20], ebp
mov edi, ecx
mov ebp, edx
CallFn:
mov [g_pCurrentRoutine], eax
mov ecx, esp
mov esp, [eax+4]
mov [eax+4], ecx
jmp dword ptr [eax]
.globl so_resume
so_resume:
mov eax, [g_pCurrentRoutine]
mov ecx, [eax+8]
mov edx, [eax+12]
mov [eax+8], ebx
mov [eax+12], esi
mov ebx, ecx
mov esi, edx
mov ecx, [eax+16]
mov edx, [eax+20]
mov [eax+16], edi
mov [eax+20], ebp
mov edi, ecx
mov ebp, edx
// put the return address in pcalladdr
mov ecx, [esp]
mov [eax], ecx
add esp, 4 // remove the return address
// swap stack pointers
mov ecx, [eax+4]
mov [eax+4], esp
mov esp, ecx
ret
.globl so_exit
so_exit:
mov eax, [g_pCurrentRoutine]
mov esp, [eax+4]
mov ebx, [eax+8]
mov esi, [eax+12]
mov edi, [eax+16]
mov ebp, [eax+20]
ret
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -1,140 +0,0 @@
; Pcsx2 - Pc Ps2 Emulator
; Copyright (C) 2002-2008 Pcsx2 Team
;
; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
;; x86-64 coroutine fucntions
extern g_pCurrentRoutine:ptr
.code
so_call proc public
test dword ptr [rcx+88], 1
jnz so_call_RestoreRegs
mov [rcx+24], rbp
mov [rcx+16], rbx
mov [rcx+32], r12
mov [rcx+40], r13
mov [rcx+48], r14
mov [rcx+56], r15
mov [rcx+64], rsi
mov [rcx+72], rdi
mov dword ptr [rcx+88], 1
jmp so_call_CallFn
so_call_RestoreRegs:
;; have to load and save at the same time
;; rbp, rbx, r12
mov rax, [rcx+24]
mov r8, [rcx+16]
mov rdx, [rcx+32]
mov [rcx+24], rbp
mov [rcx+16], rbx
mov [rcx+32], r12
mov rbp, rax
mov rbx, r8
mov r12, rdx
;; r13, r14, r15
mov rax, [rcx+40]
mov r8, [rcx+48]
mov rdx, [rcx+56]
mov [rcx+40], r13
mov [rcx+48], r14
mov [rcx+56], r15
mov r13, rax
mov r14, r8
mov r15, rdx
;; rsi, rdi
mov rax, [rcx+64]
mov rdx, [rcx+72]
mov [rcx+64], rsi
mov [rcx+72], rdi
mov rsi, rax
mov rdi, rdx
so_call_CallFn:
mov [g_pCurrentRoutine], rcx
;; swap the stack
mov rax, [rcx+8]
mov [rcx+8], rsp
mov rsp, rax
mov rax, [rcx+0]
mov rcx, [rcx+80]
jmp rax
so_call endp
; so_resume
so_resume proc public
;; rbp, rbx, r12
mov rcx, [g_pCurrentRoutine]
mov rax, [rcx+24]
mov r8, [rcx+16]
mov rdx, [rcx+32]
mov [rcx+24], rbp
mov [rcx+16], rbx
mov [rcx+32], r12
mov rbp, rax
mov rbx, r8
mov r12, rdx
;; r13, r14, r15
mov rax, [rcx+40]
mov r8, [rcx+48]
mov rdx, [rcx+56]
mov [rcx+40], r13
mov [rcx+48], r14
mov [rcx+56], r15
mov r13, rax
mov r14, r8
mov r15, rdx
;; rsi, rdi
mov rax, [rcx+64]
mov rdx, [rcx+72]
mov [rcx+64], rsi
mov [rcx+72], rdi
mov rsi, rax
mov rdi, rdx
;; put the return address in pcalladdr
mov rax, [rsp]
mov [rcx], rax
add rsp, 8 ;; remove the return address
;; swap stack pointers
mov rax, [rcx+8]
mov [rcx+8], rsp
mov rsp, rax
ret
so_resume endp
so_exit proc public
mov rcx, [g_pCurrentRoutine]
mov rsp, [rcx+8]
mov rbp, [rcx+24]
mov rbx, [rcx+16]
mov r12, [rcx+32]
mov r13, [rcx+40]
mov r14, [rcx+48]
mov r15, [rcx+56]
mov rsi, [rcx+64]
mov rdi, [rcx+72]
ret
so_exit endp
end

View File

@ -1,153 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "coroutine.h"
struct coroutine {
void* pcalladdr;
void *pcurstack;
uptr storeebx, storeesi, storeedi, storeebp;
s32 restore; // if nonzero, restore the registers
s32 alloc;
//struct s_coroutine *caller;
//struct s_coroutine *restarget;
};
#define CO_STK_ALIGN 256
#define CO_STK_COROSIZE ((sizeof(coroutine) + CO_STK_ALIGN - 1) & ~(CO_STK_ALIGN - 1))
#define CO_MIN_SIZE (4 * 1024)
coroutine* g_pCurrentRoutine;
coroutine_t so_create(void (*func)(void *), void *data, void *stack, int size)
{
void* endstack;
int alloc = 0; // r = CO_STK_COROSIZE;
coroutine *co;
if ((size &= ~(sizeof(s32) - 1)) < CO_MIN_SIZE) return NULL;
if (!stack) {
size = (size + sizeof(coroutine) + CO_STK_ALIGN - 1) & ~(CO_STK_ALIGN - 1);
stack = malloc(size);
if (!stack) return NULL;
alloc = size;
}
endstack = (char*)stack + size - 64;
co = (coroutine*)stack;
stack = (char *) stack + CO_STK_COROSIZE;
*(void**)endstack = NULL;
*(void**)((char*)endstack+sizeof(void*)) = data;
co->alloc = alloc;
co->pcalladdr = (void*)func;
co->pcurstack = endstack;
return co;
}
void so_delete(coroutine_t coro)
{
coroutine *co = (coroutine *) coro;
pxAssert( co != NULL );
if (co->alloc) free(co);
}
// see acoroutines.S and acoroutines.asm for other asm implementations
#if defined(_MSC_VER)
__declspec(naked) void so_call(coroutine_t coro)
{
__asm {
mov eax, dword ptr [esp+4]
test dword ptr [eax+24], 1
jnz RestoreRegs
mov [eax+8], ebx
mov [eax+12], esi
mov [eax+16], edi
mov [eax+20], ebp
mov dword ptr [eax+24], 1
jmp CallFn
RestoreRegs:
// have to load and save at the same time
mov ecx, [eax+8]
mov edx, [eax+12]
mov [eax+8], ebx
mov [eax+12], esi
mov ebx, ecx
mov esi, edx
mov ecx, [eax+16]
mov edx, [eax+20]
mov [eax+16], edi
mov [eax+20], ebp
mov edi, ecx
mov ebp, edx
CallFn:
mov [g_pCurrentRoutine], eax
mov ecx, esp
mov esp, [eax+4]
mov [eax+4], ecx
jmp dword ptr [eax]
}
}
__declspec(naked) void so_resume(void)
{
__asm {
mov eax, [g_pCurrentRoutine]
mov ecx, [eax+8]
mov edx, [eax+12]
mov [eax+8], ebx
mov [eax+12], esi
mov ebx, ecx
mov esi, edx
mov ecx, [eax+16]
mov edx, [eax+20]
mov [eax+16], edi
mov [eax+20], ebp
mov edi, ecx
mov ebp, edx
// put the return address in pcalladdr
mov ecx, [esp]
mov [eax], ecx
add esp, 4 // remove the return address
// swap stack pointers
mov ecx, [eax+4]
mov [eax+4], esp
mov esp, ecx
ret
}
}
__declspec(naked) void so_exit(void)
{
__asm {
mov eax, [g_pCurrentRoutine]
mov esp, [eax+4]
mov ebx, [eax+8]
mov esi, [eax+12]
mov edi, [eax+16]
mov ebp, [eax+20]
ret
}
}
#endif

View File

@ -1,27 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef PCSX2_COROUTINE_LIB
#define PCSX2_COROUTINE_LIB
// low level coroutine library
typedef void *coroutine_t;
coroutine_t so_create(void (*func)(void *), void *data, void *stack, int size);
void so_delete(coroutine_t coro);
#include "NakedAsm.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -99,7 +99,6 @@ struct decoder_t {
/* bit parsing stuff */
u32 bitstream_buf; /* current 32 bit working set */
int bitstream_bits; /* used bits in working set */
u8 * bitstream_ptr; /* buffer with stream data; 128 bits buffer */
struct macroblock_8 *mb8;
struct macroblock_16 *mb16;
@ -173,13 +172,13 @@ extern void (__fastcall *mpeg2_idct_add) (int last, s16 * block, s16* dest, int
#define IDEC 0
#define BDEC 1
void mpeg2sliceIDEC(void* pdone);
void mpeg2_slice(void* pdone);
int get_macroblock_address_increment(decoder_t * const decoder);
int get_macroblock_modes (decoder_t * const decoder);
bool mpeg2sliceIDEC();
bool mpeg2_slice();
int get_macroblock_address_increment();
int get_macroblock_modes();
extern int get_motion_delta (decoder_t * const decoder, const int f_code);
extern int get_dmv (decoder_t * const decoder);
extern int get_motion_delta(const int f_code);
extern int get_dmv();
extern int non_linear_quantizer_scale[];
extern decoder_t g_decoder;
@ -189,7 +188,7 @@ void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb1
void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4);
void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16);
int slice (decoder_t * const decoder, u8 * buffer);
int slice (u8 * buffer);
/* idct.c */
void mpeg2_idct_init ();
@ -199,4 +198,10 @@ void mpeg2_idct_init ();
#define BigEndian(out, in) out = __builtin_bswap32(in) // or we could use the asm function bswap...
#endif
#ifdef _MSC_VER
#define BigEndian64(out, in) out = _byteswap_uint64(in)
#else
#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap...
#endif
#endif//__MPEG_H__

View File

@ -25,55 +25,70 @@
#ifndef __VLC_H__
#define __VLC_H__
#include "IPU/coroutine.h"
static u8 data[2];
static u8 dword[4];
//static u8 word[4];
//static u8 dword[8];
//static u8 qword[16];
extern tIPU_BP g_BP;
extern decoder_t g_decoder;
extern decoder_t decoder;
extern void ReorderBitstream();
static __forceinline void GETWORD(u32 * bit_buf,int bits)
static __forceinline int GETWORD()
{
while(!getBits16(data,1))
if (decoder.bitstream_bits > 0)
{
so_resume();
if(!getBits16(data,1))
{
return 0;
}
/*u32 data;
BigEndian(data, *(u32*)word);
decoder.bitstream_buf |= (u64)data << decoder.bitstream_bits;
decoder.bitstream_bits -= 32;*/
decoder.bitstream_buf |= ((u32)(((u16)data[0] << 8) | data[1])) << decoder.bitstream_bits;
decoder.bitstream_bits -= 16;
}
*bit_buf |= ((data[0] << 8) | data[1]) << (bits);
return 1;
}
static __forceinline void bitstream_init (decoder_t * decoder){
decoder->bitstream_bits = -16;
static __forceinline int bitstream_init ()
{
if (!getBits32((u8*)&decoder.bitstream_buf, 1))
{
return 0;
}
while( !getBits32(dword, 1) )
so_resume();
decoder.bitstream_bits = -16;
BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
/*decoder.bitstream_buf = *(u64*)dword;
BigEndian64(decoder.bitstream_buf, decoder.bitstream_buf);*/
decoder->bitstream_buf = (dword[0] << 24) | (dword[1] << 16) |
(dword[2] << 8) |dword[3];
return 1;
}
/* make sure that there are at least 16 valid bits in bit_buf */
#define NEEDBITS(bit_buf,bits,bit_ptr) \
do { \
if (bits > 0) { \
GETWORD(&bit_buf,bits); \
bits -= 16; \
} \
} while (0)
/* remove num valid bits from bit_buf */
#define DUMPBITS(bit_buf,bits,num) \
do { \
/*IPU_LOG("DUMPBITS %d\n",num);*/ \
bit_buf <<= (num); \
bits += (num); \
} while (0)
static __forceinline void DUMPBITS(int num)
{
decoder.bitstream_buf <<= num;
decoder.bitstream_bits += num;
}
/* take num bits from the high part of bit_buf and zero extend them */
#define UBITS(bit_buf,num) (((u32)(bit_buf)) >> (32 - (num)))
#define UBITS(num) (((u32)decoder.bitstream_buf) >> (32 - (num)))
/* take num bits from the high part of bit_buf and sign extend them */
#define SBITS(bit_buf,num) (((s32)(bit_buf)) >> (32 - (num)))
#define SBITS(num) (((s32)decoder.bitstream_buf) >> (32 - (num)))
/* Get bits from bitstream */
static __forceinline u32 GETBITS(int num)
{
u16 retVal = UBITS(num);
DUMPBITS(num);
return retVal;
}
struct MBtab {
u8 modes;
@ -443,4 +458,247 @@ static const MBAtab MBA_11 [] = {
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}
};
// New
/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
static MBAtab MBAtab1[16] =
{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4},
{3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3}
};
/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */
static MBAtab MBAtab2[104] =
{
{33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11},
{25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10},
{19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10},
{15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8},
{14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8},
{13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8},
{12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8},
{11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8},
{10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8},
{9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7},
{9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7},
{8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7},
{8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}
};
/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
static const DCtab DClumtab0[32] =
{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
{4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0}
};
/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
static const DCtab DClumtab1[16] =
{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9}
};
/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
static const DCtab DCchromtab0[32] =
{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0}
};
/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
static const DCtab DCchromtab1[32] =
{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
{8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10}
};
/* Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for first (DC) coefficient)
*/
static const DCTtab DCTtabfirst[12] =
{
{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}
};
/* Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for all other coefficients)
*/
static const DCTtab DCTtabnext[12] =
{
{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}
};
/* Table B-14, DCT coefficients table zero,
* codes 000001xx ... 00111xxx
*/
static const DCTtab DCTtab0[60] =
{
{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
{0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
{7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
{6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
{1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
{13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
{3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}
};
/* Table B-15, DCT coefficients table one,
* codes 000001xx ... 11111111
*/
static const DCTtab DCTtab0a[252] =
{
{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
{6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
{0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
{0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
{4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
{1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
{13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
{9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
{10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
{0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
{2,3,8}, {4,2,8}, {0,14,8}, {0,15,8}
};
/* Table B-14, DCT coefficients table zero,
* codes 0000001000 ... 0000001111
*/
static const DCTtab DCTtab1[8] =
{
{16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
{1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
};
/* Table B-15, DCT coefficients table one,
* codes 000000100x ... 000000111x
*/
static const DCTtab DCTtab1a[8] =
{
{5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
{2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
};
/* Table B-14/15, DCT coefficients table zero / one,
* codes 000000010000 ... 000000011111
*/
static const DCTtab DCTtab2[16] =
{
{0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
{2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
{0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
{3,3,12}, {0,8,12}, {6,2,12}, {17,1,12}
};
/* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000010000 ... 0000000011111
*/
static const DCTtab DCTtab3[16] =
{
{10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
{2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
{0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
{25,1,13}, {24,1,13}, {23,1,13}, {22,1,13}
};
/* Table B-14/15, DCT coefficients table zero / one,
* codes 00000000010000 ... 00000000011111
*/
static const DCTtab DCTtab4[16] =
{
{0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
{0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
{0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
{0,19,14}, {0,18,14}, {0,17,14}, {0,16,14}
};
/* Table B-14/15, DCT coefficients table zero / one,
* codes 000000000010000 ... 000000000011111
*/
static const DCTtab DCTtab5[16] =
{
{0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
{0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
{0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
{1,11,15}, {1,10,15}, {1,9,15}, {1,8,15}
};
/* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000000010000 ... 0000000000011111
*/
static const DCTtab DCTtab6[16] =
{
{1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
{6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
{13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
{30,1,16}, {29,1,16}, {28,1,16}, {27,1,16}
};
#endif//__VLC_H__

View File

@ -17,17 +17,6 @@
#ifndef NAKED_ASM_H
#define NAKED_ASM_H
#include "IPU/coroutine.h"
// Common to Windows and Linux
extern "C"
{
// acoroutine.S
void so_call(coroutine_t coro);
void so_resume(void);
void so_exit(void);
}
#ifdef __LINUX__
extern "C"

View File

@ -1254,14 +1254,6 @@
<Filter
Name="IPU"
>
<File
RelativePath="..\..\IPU\coroutine.cpp"
>
</File>
<File
RelativePath="..\..\IPU\coroutine.h"
>
</File>
<File
RelativePath="..\..\Ipu\IPU.cpp"
>
@ -1270,7 +1262,7 @@
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
UsePrecompiledHeader="2"
/>
</FileConfiguration>
<FileConfiguration
@ -1302,7 +1294,7 @@
>
<Tool
Name="VCCLCompilerTool"
UsePrecompiledHeader="0"
UsePrecompiledHeader="2"
/>
</FileConfiguration>
<FileConfiguration