IPU: Various minor header file, table, and inline function tweakings/cleanups. Note that I unified several tables into structs and applied __aligned16 to them. I'm not just being silly: this seems to have a noticeable positive effect on framerates (~3-4% here).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3573 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-26 18:14:56 +00:00
parent 7074d31651
commit 4bb830827d
7 changed files with 419 additions and 366 deletions

View File

@ -24,6 +24,8 @@
#include "IPU.h"
#include "yuv2rgb.h"
#include "mpeg2lib/Mpeg.h"
#include "Vif.h"
#include "Gif.h"
#include "Vif_Dma.h"
@ -50,7 +52,7 @@ u8* g_pIPU0Pointer = NULL;
void ReorderBitstream();
// the BP doesn't advance and returns -1 if there is no data to be read
tIPU_BP g_BP;
__aligned16 tIPU_BP g_BP;
void IPUWorker();
@ -65,6 +67,7 @@ static u8 iq[64]; //intraquant matrix
u16 vqclut[16]; //clut conversion table
static u8 s_thresh[2]; //thresholds for color conversions
int coded_block_pattern = 0;
__aligned16 macroblock_8 mb8;
__aligned16 macroblock_16 mb16;
__aligned16 macroblock_rgb32 rgb32;
@ -73,8 +76,7 @@ __aligned16 macroblock_rgb16 rgb16;
u8 indx4[16*16/2];
bool mpeg2_inited = false; //mpeg2_idct_init() must be called only once
u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'};
decoder_t decoder; //static, only to place it in bss
decoder_t tempdec;
__aligned16 decoder_t decoder; //static, only to place it in bss
extern "C"
{
@ -96,10 +98,6 @@ void init_g_decoder()
decoder.intra_quantizer_matrix = (u8*)iq;
decoder.non_intra_quantizer_matrix = (u8*)niq;
decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
decoder.mb8 = &mb8;
decoder.mb16 = &mb16;
decoder.rgb32 = &rgb32;
decoder.rgb16 = &rgb16;
decoder.stride = 16;
}
@ -428,8 +426,8 @@ static __forceinline BOOL ipuBDEC(u32 val, bool resume)
decoder.dcr = bdec.DCR;
decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
memzero(mb8);
memzero(mb16);
memzero_sse_a(mb8);
memzero_sse_a(mb16);
}
return mpeg2_slice();
@ -595,8 +593,8 @@ static BOOL __fastcall ipuCSC(u32 val)
if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
ipu_csc(&mb8, &rgb32, 0);
if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE);
ipu_csc(mb8, rgb32, 0);
if (csc.OFM) ipu_dither(rgb32, rgb16, csc.DTE);
if (csc.OFM)
{
@ -637,10 +635,10 @@ static BOOL ipuPACK(u32 val)
if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
}
ipu_csc(&mb8, &rgb32, 0);
ipu_dither(&rgb32, &rgb16, csc.DTE);
ipu_csc(mb8, rgb32, 0);
ipu_dither(rgb32, rgb16, csc.DTE);
if (csc.OFM) ipu_vq(&rgb16, indx4);
if (csc.OFM) ipu_vq(rgb16, indx4);
if (csc.OFM)
{
@ -1117,10 +1115,10 @@ u8 __fastcall getBits8(u8 *address, u32 advance)
void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V,
const int Src_BpS, const int Width, const int Height);
void __fastcall ipu_csc(macroblock_8 *mb8, macroblock_rgb32 *rgb32, int sgn)
__forceinline void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
{
int i;
u8* p = (u8*)rgb32;
u8* p = (u8*)&rgb32;
yuv2rgb();
@ -1151,30 +1149,30 @@ void __fastcall ipu_csc(macroblock_8 *mb8, macroblock_rgb32 *rgb32, int sgn)
}
}
void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb16, int dte)
__forceinline void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte)
{
int i, j;
for (i = 0; i < 16; ++i)
{
for (j = 0; j < 16; ++j)
{
rgb16->c[i][j].r = rgb32->c[i][j].r >> 3;
rgb16->c[i][j].g = rgb32->c[i][j].g >> 3;
rgb16->c[i][j].b = rgb32->c[i][j].b >> 3;
rgb16->c[i][j].a = rgb32->c[i][j].a == 0x40;
rgb16.c[i][j].r = rgb32.c[i][j].r >> 3;
rgb16.c[i][j].g = rgb32.c[i][j].g >> 3;
rgb16.c[i][j].b = rgb32.c[i][j].b >> 3;
rgb16.c[i][j].a = rgb32.c[i][j].a == 0x40;
}
}
}
void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4)
__forceinline void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
{
Console.Error("IPU: VQ not implemented");
}
void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16)
__forceinline void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16)
{
const u8 *s = (const u8*)mb8;
s16 *d = (s16*)mb16;
const u8 *s = (const u8*)&mb8;
s16 *d = (s16*)&mb16;
int i;
for (i = 0; i < 256; i++) *d++ = *s++; //Y bias - 16
for (i = 0; i < 64; i++) *d++ = *s++; //Cr bias - 128

View File

@ -16,7 +16,6 @@
#ifndef __IPU_H__
#define __IPU_H__
#include "mpeg2lib/Mpeg.h"
#include "IPU_Fifo.h"
#ifdef _MSC_VER
@ -342,16 +341,11 @@ struct tIPU_cmd
};
extern tIPU_cmd ipu_cmd;
extern tIPU_BP g_BP;
extern int coded_block_pattern;
extern int g_nIPU0Data; // or 0x80000000 whenever transferring
extern u8* g_pIPU0Pointer;
extern IPUStatus IPU1Status;
extern tIPU_DMA g_nDMATransfer;
// The IPU can only do one task at once and never uses other buffers so these
// should be made available to functions in other modules to save registers.
extern __aligned16 macroblock_rgb32 rgb32;
extern __aligned16 macroblock_8 mb8;
extern int ipuInit();
extern void ipuReset();

View File

@ -15,8 +15,9 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "IPU_Fifo.h"
#include "IPU.h"
#include "mpeg2lib/Mpeg.h"
IPU_Fifo ipu_fifo;

View File

@ -243,9 +243,9 @@ int get_macroblock_address_increment()
u16 code = UBITS(16);
if (code >= 4096)
mba = MBA_5 + (UBITS(5) - 2);
mba = MBA.mba5 + (UBITS(5) - 2);
else if (code >= 768)
mba = MBA_11 + (UBITS(11) - 24);
mba = MBA.mba11 + (UBITS(11) - 24);
else switch (UBITS(11))
{
@ -277,16 +277,16 @@ static __forceinline int get_luma_dc_dct_diff()
if (code < 31)
{
size = DClumtab0[code].size;
DUMPBITS(DClumtab0[code].len);
size = DCtable.lum0[code].size;
DUMPBITS(DCtable.lum0[code].len);
// 5 bits max
}
else
{
code = UBITS(9) - 0x1f0;
size = DClumtab1[code].size;
DUMPBITS(DClumtab1[code].len);
size = DCtable.lum1[code].size;
DUMPBITS(DCtable.lum1[code].len);
// 9 bits max
}
@ -313,14 +313,14 @@ static __forceinline int get_chroma_dc_dct_diff()
if (code<31)
{
size = DCchromtab0[code].size;
DUMPBITS(DCchromtab0[code].len);
size = DCtable.chrom0[code].size;
DUMPBITS(DCtable.chrom0[code].len);
}
else
{
code = UBITS(10) - 0x3e0;
size = DCchromtab1[code].size;
DUMPBITS(DCchromtab1[code].len);
size = DCtable.chrom1[code].size;
DUMPBITS(DCtable.chrom1[code].len);
}
if (size==0)
@ -371,49 +371,55 @@ static __forceinline bool get_intra_block()
if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
{
tab = &DCTtabnext[(code >> 12) - 4];
tab = &DCT.next[(code >> 12) - 4];
}
else if (code >= 1024)
{
if (decoder.intra_vlc_format && !decoder.mpeg1)
{
tab = &DCTtab0a[(code >> 8) - 4];
}
else
{
tab = &DCTtab0[(code >> 8) - 4];
}
if (decoder.intra_vlc_format && !decoder.mpeg1)
{
tab = &DCT.tab0a[(code >> 8) - 4];
}
else
{
tab = &DCT.tab0[(code >> 8) - 4];
}
}
else if (code >= 512)
{
if (decoder.intra_vlc_format && !decoder.mpeg1)
{
tab = &DCTtab1a[(code >> 6) - 8];
}
else
{
tab = &DCTtab1[(code >> 6) - 8];
}
if (decoder.intra_vlc_format && !decoder.mpeg1)
{
tab = &DCT.tab1a[(code >> 6) - 8];
}
else
{
tab = &DCT.tab1[(code >> 6) - 8];
}
}
// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
// that should use a single unrolled DCT table instead of five separate tables used
// here. Multiple conditional statements are very slow, while modern CPU data caches
// have lots of room to spare.
else if (code >= 256)
{
tab = &DCTtab2[(code >> 4) - 16];
tab = &DCT.tab2[(code >> 4) - 16];
}
else if (code >= 128)
{
tab = &DCTtab3[(code >> 3) - 16];
tab = &DCT.tab3[(code >> 3) - 16];
}
else if (code >= 64)
{
tab = &DCTtab4[(code >> 2) - 16];
tab = &DCT.tab4[(code >> 2) - 16];
}
else if (code >= 32)
{
tab = &DCTtab5[(code >> 1) - 16];
tab = &DCT.tab5[(code >> 1) - 16];
}
else if (code >= 16)
{
tab = &DCTtab6[code - 16];
tab = &DCT.tab6[code - 16];
}
else
{
@ -519,40 +525,46 @@ static __forceinline bool get_non_intra_block(int * last)
{
if (i==0)
{
tab = &DCTtabfirst[(code >> 12) - 4];
tab = &DCT.first[(code >> 12) - 4];
}
else
{
tab = &DCTtabnext[(code >> 12)- 4];
tab = &DCT.next[(code >> 12)- 4];
}
}
else if (code >= 1024)
{
tab = &DCTtab0[(code >> 8) - 4];
tab = &DCT.tab0[(code >> 8) - 4];
}
else if (code >= 512)
{
tab = &DCTtab1[(code >> 6) - 8];
tab = &DCT.tab1[(code >> 6) - 8];
}
// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
// that should use a single unrolled DCT table instead of five separate tables used
// here. Multiple conditional statements are very slow, while modern CPU data caches
// have lots of room to spare.
else if (code >= 256)
{
tab = &DCTtab2[(code >> 4) - 16];
tab = &DCT.tab2[(code >> 4) - 16];
}
else if (code >= 128)
{
tab = &DCTtab3[(code >> 3) - 16];
tab = &DCT.tab3[(code >> 3) - 16];
}
else if (code >= 64)
{
tab = &DCTtab4[(code >> 2) - 16];
tab = &DCT.tab4[(code >> 2) - 16];
}
else if (code >= 32)
{
tab = &DCTtab5[(code >> 1) - 16];
tab = &DCT.tab5[(code >> 1) - 16];
}
else if (code >= 16)
{
tab = &DCTtab6[code - 16];
tab = &DCT.tab6[code - 16];
}
else
{
@ -625,7 +637,7 @@ static __forceinline bool get_non_intra_block(int * last)
return true;
}
static bool __fastcall slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
static __forceinline bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
{
if (!skip || ipu_cmd.pos[3])
{
@ -655,13 +667,13 @@ static bool __fastcall slice_intra_DCT(const int cc, u8 * const dest, const int
return true;
}
static bool __fastcall slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
static __forceinline bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
{
int last;
if (!skip)
{
memzero(decoder.DCTblock);
memzero_sse_a(decoder.DCTblock);
}
if (!get_non_intra_block(&last))
@ -735,8 +747,8 @@ bool mpeg2sliceIDEC()
}
decoder.coded_block_pattern = 0x3F;//all 6 blocks
memzero(*decoder.mb8);
memzero(*decoder.rgb32);
memzero_sse_a(mb8);
memzero_sse_a(rgb32);
case 1:
ipu_cmd.pos[1] = 1;
@ -756,37 +768,37 @@ bool mpeg2sliceIDEC()
{
case 0:
case 1:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[2] == 1))
if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
{
ipu_cmd.pos[2] = 1;
return false;
}
case 2:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
{
ipu_cmd.pos[2] = 2;
return false;
}
case 3:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
{
ipu_cmd.pos[2] = 3;
return false;
}
case 4:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
{
ipu_cmd.pos[2] = 4;
return false;
}
case 5:
if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[2] == 5))
if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder.stride >> 1, ipu_cmd.pos[2] == 5))
{
ipu_cmd.pos[2] = 5;
return false;
}
case 6:
if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[2] == 6))
if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder.stride >> 1, ipu_cmd.pos[2] == 6))
{
ipu_cmd.pos[2] = 6;
return false;
@ -794,19 +806,19 @@ bool mpeg2sliceIDEC()
}
// Send The MacroBlock via DmaIpuFrom
ipu_csc(decoder.mb8, decoder.rgb32, decoder.sgn);
ipu_csc(mb8, rgb32, decoder.sgn);
if (decoder.ofm == 0)
{
g_nIPU0Data = 64;
g_pIPU0Pointer = (u8*)decoder.rgb32;
g_pIPU0Pointer = (u8*)&rgb32;
}
else
{
ipu_dither(decoder.rgb32, decoder.rgb16, decoder.dte);
ipu_dither(rgb32, rgb16, decoder.dte);
g_nIPU0Data = 32;
g_pIPU0Pointer = (u8*)decoder.rgb16;
g_pIPU0Pointer = (u8*)&rgb16;
}
case 2:
@ -841,12 +853,12 @@ bool mpeg2sliceIDEC()
code = UBITS(16);
if (code >= 0x1000)
{
mba = MBA_5 + (UBITS(5) - 2);
mba = MBA.mba5 + (UBITS(5) - 2);
break;
}
else if (code >= 0x0300)
{
mba = MBA_11 + (UBITS(11) - 24);
mba = MBA.mba11 + (UBITS(11) - 24);
break;
}
else switch (UBITS(11))
@ -942,8 +954,8 @@ bool mpeg2_slice()
ipuRegs->ctrl.ECD = 0;
ipuRegs->top = 0;
memzero(*decoder.mb8);
memzero(*decoder.mb16);
memzero_sse_a(mb8);
memzero_sse_a(mb16);
case 1:
if (!bitstream_init())
{
@ -972,37 +984,37 @@ bool mpeg2_slice()
case 0:
decoder.coded_block_pattern = 0x3F;
case 1:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[1] == 1))
if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
{
ipu_cmd.pos[1] = 1;
return false;
}
case 2:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
{
ipu_cmd.pos[1] = 2;
return false;
}
case 3:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
{
ipu_cmd.pos[1] = 3;
return false;
}
case 4:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
{
ipu_cmd.pos[1] = 4;
return false;
}
case 5:
if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
{
ipu_cmd.pos[1] = 5;
return false;
}
case 6:
if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
{
ipu_cmd.pos[1] = 6;
return false;
@ -1010,7 +1022,7 @@ bool mpeg2_slice()
break;
}
ipu_copy(decoder.mb8, decoder.mb16);
ipu_copy(mb8, mb16);
}
else
{
@ -1023,7 +1035,7 @@ bool mpeg2_slice()
case 1:
if (decoder.coded_block_pattern & 0x20)
{
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y, DCT_stride, ipu_cmd.pos[1] == 1))
if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
{
ipu_cmd.pos[1] = 1;
return false;
@ -1032,7 +1044,7 @@ bool mpeg2_slice()
case 2:
if (decoder.coded_block_pattern & 0x10)
{
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
{
ipu_cmd.pos[1] = 2;
return false;
@ -1041,7 +1053,7 @@ bool mpeg2_slice()
case 3:
if (decoder.coded_block_pattern & 0x08)
{
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
{
ipu_cmd.pos[1] = 3;
return false;
@ -1050,7 +1062,7 @@ bool mpeg2_slice()
case 4:
if (decoder.coded_block_pattern & 0x04)
{
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
{
ipu_cmd.pos[1] = 4;
return false;
@ -1059,7 +1071,7 @@ bool mpeg2_slice()
case 5:
if (decoder.coded_block_pattern & 0x2)
{
if (!slice_non_intra_DCT((s16*)decoder.mb16->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
{
ipu_cmd.pos[1] = 5;
return false;
@ -1068,7 +1080,7 @@ bool mpeg2_slice()
case 6:
if (decoder.coded_block_pattern & 0x1)
{
if (!slice_non_intra_DCT((s16*)decoder.mb16->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
{
ipu_cmd.pos[1] = 6;
return false;
@ -1098,7 +1110,7 @@ bool mpeg2_slice()
decoder.mbc = 1;
g_nIPU0Data = 48;
g_pIPU0Pointer = (u8*)decoder.mb16;
g_pIPU0Pointer = (u8*)&mb16;
case 3:
while (g_nIPU0Data > 0)

View File

@ -22,8 +22,50 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __MPEG_H__
#define __MPEG_H__
#pragma once
#include <xmmintrin.h>
template< typename T >
__noinline void memzero_sse_a( T& dest )
{
#define MZFqwc (sizeof(dest)/16)
C_ASSERT( (sizeof(dest) & 0xf) == 0 );
__m128 zeroreg = _mm_setzero_ps();
float (*destxmm)[4] = (float(*)[4])&dest;
#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx][0], zeroreg)
switch( MZFqwc & 0x07 )
{
StoreDestIdx(0x07);
StoreDestIdx(0x06);
StoreDestIdx(0x05);
StoreDestIdx(0x04);
StoreDestIdx(0x03);
StoreDestIdx(0x02);
StoreDestIdx(0x01);
}
destxmm += (MZFqwc & 0x07);
for( uint i=0; i<MZFqwc / 8; ++i, destxmm+=8 )
{
_mm_store_ps(&destxmm[0][0], zeroreg);
_mm_store_ps(&destxmm[1][0], zeroreg);
_mm_store_ps(&destxmm[2][0], zeroreg);
_mm_store_ps(&destxmm[3][0], zeroreg);
_mm_store_ps(&destxmm[4][0], zeroreg);
_mm_store_ps(&destxmm[5][0], zeroreg);
_mm_store_ps(&destxmm[6][0], zeroreg);
_mm_store_ps(&destxmm[7][0], zeroreg);
}
#undef MZFqwc
};
enum macroblock_modes
{
@ -81,12 +123,12 @@ struct macroblock_rgb32{
} c[16][16];
};
struct rgb16{
struct rgb16_t{
unsigned short r:5, g:5, b:5, a:1;
};
struct macroblock_rgb16{
struct rgb16 c[16][16];
rgb16_t c[16][16];
};
struct decoder_t {
@ -100,11 +142,6 @@ struct decoder_t {
u32 bitstream_buf; /* current 32 bit working set */
int bitstream_bits; /* used bits in working set */
struct macroblock_8 *mb8;
struct macroblock_16 *mb16;
struct macroblock_rgb32 *rgb32;
struct macroblock_rgb16 *rgb16;
int stride;
/* predictor for DC coefficients in intra blocks */
@ -172,25 +209,24 @@ extern void (__fastcall *mpeg2_idct_add) (int last, s16 * block, s16* dest, int
#define IDEC 0
#define BDEC 1
bool mpeg2sliceIDEC();
bool mpeg2_slice();
int get_macroblock_address_increment();
int get_macroblock_modes();
extern bool mpeg2sliceIDEC();
extern bool mpeg2_slice();
extern int get_macroblock_address_increment();
extern int get_macroblock_modes();
extern int get_motion_delta(const int f_code);
extern int get_dmv();
extern int non_linear_quantizer_scale[];
extern decoder_t g_decoder;
void __fastcall ipu_csc(macroblock_8 *mb8, macroblock_rgb32 *rgb32, int sgn);
void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb16, int dte);
void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4);
void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16);
extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
extern void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16);
int slice (u8 * buffer);
extern int slice (u8 * buffer);
/* idct.c */
void mpeg2_idct_init ();
extern void mpeg2_idct_init ();
#ifdef _MSC_VER
#define BigEndian(out, in) out = _byteswap_ulong(in)
@ -204,4 +240,13 @@ void mpeg2_idct_init ();
#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap...
#endif
#endif//__MPEG_H__
// The IPU can only do one task at once and never uses other buffers so all mpeg state variables
// are made available to mpeg/vlc modules as globals here:
extern __aligned16 tIPU_BP g_BP;
extern __aligned16 decoder_t decoder;
extern __aligned16 macroblock_8 mb8;
extern __aligned16 macroblock_16 mb16;
extern __aligned16 macroblock_rgb32 rgb32;
extern __aligned16 macroblock_rgb16 rgb16;

View File

@ -21,6 +21,11 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
// WARNING! This file should only be included into Mpeg.cpp AND NOWHERE ELSE.
// All contents of this file are used only by Mpeg.cpp, and including it elsewhere will
// just result in the linker having to remove a whole lot of redundant/unused decoder
// tables and static functions. -- air
#ifndef __VLC_H__
#define __VLC_H__
@ -29,9 +34,6 @@ static u8 data[2];
//static u8 word[4];
//static u8 dword[8];
//static u8 qword[16];
extern tIPU_BP g_BP;
extern decoder_t decoder;
extern void ReorderBitstream();
static __forceinline int GETWORD()
{
@ -137,7 +139,7 @@ static const MBtab MB_I [] = {
#define MC MACROBLOCK_MOTION_FORWARD
#define CODED MACROBLOCK_PATTERN
static const MBtab MB_P [] = {
static const __aligned16 MBtab MB_P [] = {
{INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5},
{MC, 3}, {MC, 3}, {MC, 3}, {MC, 3},
{CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2},
@ -152,7 +154,7 @@ static const MBtab MB_P [] = {
#define BWD MACROBLOCK_MOTION_BACKWARD
#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
static const MBtab MB_B [] = {
static const __aligned16 MBtab MB_B [] = {
{0, 0}, {INTRA|QUANT, 6},
{BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6},
{INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
@ -186,7 +188,7 @@ static const MVtab MV_4 [] = {
{ 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
};
static const MVtab MV_10 [] = {
static const __aligned16 MVtab MV_10 [] = {
{ 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
{ 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
{11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
@ -201,7 +203,7 @@ static const DMVtab DMV_2 [] = {
};
static const CBPtab CBP_7 [] = {
static const __aligned16 CBPtab CBP_7 [] = {
{0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
{0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
{0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
@ -232,7 +234,7 @@ static const CBPtab CBP_7 [] = {
{0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
};
static const CBPtab CBP_9 [] = {
static const __aligned16 CBPtab CBP_9 [] = {
{0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
{0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
{0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
@ -251,6 +253,8 @@ static const CBPtab CBP_9 [] = {
{0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
};
#if 0 // following tables are unused by PCSX2
static const DCtab DC_lum_5 [] = {
{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
@ -272,7 +276,6 @@ static const DCtab DC_long [] = {
{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
};
static const DCTtab DCT_16 [] = {
{129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
{129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
@ -421,47 +424,54 @@ static const DCTtab DCT_B15_8 [] = {
{ 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8},
{ 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8}
};
#endif
static const MBAtab MBA_5 [] = {
{6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
struct MBAtabSet
{
MBAtab mba5[30];
MBAtab mba11[26*4];
};
static const __aligned16 MBAtabSet MBA = {
{ // mba5
{6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
},
static const MBAtab MBA_11 [] = {
{32, 11}, {31, 11}, {30, 11}, {29, 11},
{28, 11}, {27, 11}, {26, 11}, {25, 11},
{24, 11}, {23, 11}, {22, 11}, {21, 11},
{20, 10}, {20, 10}, {19, 10}, {19, 10},
{18, 10}, {18, 10}, {17, 10}, {17, 10},
{16, 10}, {16, 10}, {15, 10}, {15, 10},
{14, 8}, {14, 8}, {14, 8}, {14, 8},
{14, 8}, {14, 8}, {14, 8}, {14, 8},
{13, 8}, {13, 8}, {13, 8}, {13, 8},
{13, 8}, {13, 8}, {13, 8}, {13, 8},
{12, 8}, {12, 8}, {12, 8}, {12, 8},
{12, 8}, {12, 8}, {12, 8}, {12, 8},
{11, 8}, {11, 8}, {11, 8}, {11, 8},
{11, 8}, {11, 8}, {11, 8}, {11, 8},
{10, 8}, {10, 8}, {10, 8}, {10, 8},
{10, 8}, {10, 8}, {10, 8}, {10, 8},
{ 9, 8}, { 9, 8}, { 9, 8}, { 9, 8},
{ 9, 8}, { 9, 8}, { 9, 8}, { 9, 8},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}
{ // mba11
{32, 11}, {31, 11}, {30, 11}, {29, 11},
{28, 11}, {27, 11}, {26, 11}, {25, 11},
{24, 11}, {23, 11}, {22, 11}, {21, 11},
{20, 10}, {20, 10}, {19, 10}, {19, 10},
{18, 10}, {18, 10}, {17, 10}, {17, 10},
{16, 10}, {16, 10}, {15, 10}, {15, 10},
{14, 8}, {14, 8}, {14, 8}, {14, 8},
{14, 8}, {14, 8}, {14, 8}, {14, 8},
{13, 8}, {13, 8}, {13, 8}, {13, 8},
{13, 8}, {13, 8}, {13, 8}, {13, 8},
{12, 8}, {12, 8}, {12, 8}, {12, 8},
{12, 8}, {12, 8}, {12, 8}, {12, 8},
{11, 8}, {11, 8}, {11, 8}, {11, 8},
{11, 8}, {11, 8}, {11, 8}, {11, 8},
{10, 8}, {10, 8}, {10, 8}, {10, 8},
{10, 8}, {10, 8}, {10, 8}, {10, 8},
{ 9, 8}, { 9, 8}, { 9, 8}, { 9, 8},
{ 9, 8}, { 9, 8}, { 9, 8}, { 9, 8},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}
}
};
// New
#if 0 // Not used by PCSX2
/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
static MBAtab MBAtab1[16] =
{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4},
@ -485,220 +495,212 @@ static MBAtab MBAtab2[104] =
{8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7},
{8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}
};
#endif
/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
static const DCtab DClumtab0[32] =
{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
{4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0}
};
/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
static const DCtab DClumtab1[16] =
{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9}
};
/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
static const DCtab DCchromtab0[32] =
{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0}
};
/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
static const DCtab DCchromtab1[32] =
{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
{8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10}
};
/* Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for first (DC) coefficient)
*/
static const DCTtab DCTtabfirst[12] =
struct DCtabSet
{
{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}
DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110
DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111
DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110
DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111
};
/* Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for all other coefficients)
*/
static const DCTtab DCTtabnext[12] =
static const __aligned16 DCtabSet DCtable =
{
{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}
// lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
{4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} },
/* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} },
/* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} },
/* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
{8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} },
};
/* Table B-14, DCT coefficients table zero,
* codes 000001xx ... 00111xxx
*/
static const DCTtab DCTtab0[60] =
struct DCTtabSet
{
{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
{0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
{7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
{6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
{1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
{13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
{3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}
DCTtab first[12];
DCTtab next[12];
DCTtab tab0[60];
DCTtab tab0a[252];
DCTtab tab1[8];
DCTtab tab1a[8];
DCTtab tab2[16];
DCTtab tab3[16];
DCTtab tab4[16];
DCTtab tab5[16];
DCTtab tab6[16];
};
/* Table B-15, DCT coefficients table one,
* codes 000001xx ... 11111111
*/
static const DCTtab DCTtab0a[252] =
static const __aligned16 DCTtabSet DCT =
{
{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
{6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
{0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
{0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
{4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
{1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
{13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
{9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
{10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
{0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
{2,3,8}, {4,2,8}, {0,14,8}, {0,15,8}
};
/* first[12]: Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for first (DC) coefficient)
*/
{ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} },
/* Table B-14, DCT coefficients table zero,
* codes 0000001000 ... 0000001111
*/
static const DCTtab DCTtab1[8] =
{
{16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
{1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
};
/* next[12]: Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for all other coefficients)
*/
{ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} },
/* Table B-15, DCT coefficients table one,
* codes 000000100x ... 000000111x
*/
static const DCTtab DCTtab1a[8] =
{
{5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
{2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
};
/* tab0[60]: Table B-14, DCT coefficients table zero,
* codes 000001xx ... 00111xxx
*/
{ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
{0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
{7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
{6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
{1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
{13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
{3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
{0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 000000010000 ... 000000011111
*/
static const DCTtab DCTtab2[16] =
{
{0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
{2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
{0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
{3,3,12}, {0,8,12}, {6,2,12}, {17,1,12}
};
/* tab0a[252]: Table B-15, DCT coefficients table one,
* codes 000001xx ... 11111111
*/
{ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
{6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
{0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
{0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
{4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
{5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
{1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
{13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
{2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
{1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
{0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
{0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
{9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
{10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
{0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
{2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000010000 ... 0000000011111
*/
static const DCTtab DCTtab3[16] =
{
{10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
{2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
{0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
{25,1,13}, {24,1,13}, {23,1,13}, {22,1,13}
};
/* Table B-14, DCT coefficients table zero,
* codes 0000001000 ... 0000001111
*/
{ {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
{1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 00000000010000 ... 00000000011111
*/
static const DCTtab DCTtab4[16] =
{
{0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
{0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
{0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
{0,19,14}, {0,18,14}, {0,17,14}, {0,16,14}
};
/* Table B-15, DCT coefficients table one,
* codes 000000100x ... 000000111x
*/
{ {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
{2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 000000000010000 ... 000000000011111
*/
static const DCTtab DCTtab5[16] =
{
{0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
{0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
{0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
{1,11,15}, {1,10,15}, {1,9,15}, {1,8,15}
};
/* Table B-14/15, DCT coefficients table zero / one,
* codes 000000010000 ... 000000011111
*/
{ {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
{2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
{0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
{3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000010000 ... 0000000011111
*/
{ {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
{2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
{0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
{25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 00000000010000 ... 00000000011111
*/
{ {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
{0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
{0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
{0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 000000000010000 ... 000000000011111
*/
{ {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
{0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
{0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
{1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} },
/* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000000010000 ... 0000000000011111
*/
{ {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
{6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
{13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
{30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} }
/* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000000010000 ... 0000000000011111
*/
static const DCTtab DCTtab6[16] =
{
{1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
{6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
{13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
{30,1,16}, {29,1,16}, {28,1,16}, {27,1,16}
};
#endif//__VLC_H__

View File

@ -22,6 +22,7 @@
#include "Common.h"
#include "IPU.h"
#include "yuv2rgb.h"
#include "mpeg2lib/Mpeg.h"
// The IPU's colour space conversion conforms to ITU-R Recommendation BT.601 if anyone wants to make a
// faster or "more accurate" implementation, but this is the precise documented integer method used by