IPU: Various minor header file, table, and inline function tweakings/cleanups. Note that I unified several tables into structs and applied __aligned16 to them. I'm not just being silly: this seems to have a noticeable positive effect on framerates (~3-4% here).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3573 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-26 18:14:56 +00:00
parent 7074d31651
commit 4bb830827d
7 changed files with 419 additions and 366 deletions

View File

@ -24,6 +24,8 @@
#include "IPU.h" #include "IPU.h"
#include "yuv2rgb.h" #include "yuv2rgb.h"
#include "mpeg2lib/Mpeg.h"
#include "Vif.h" #include "Vif.h"
#include "Gif.h" #include "Gif.h"
#include "Vif_Dma.h" #include "Vif_Dma.h"
@ -50,7 +52,7 @@ u8* g_pIPU0Pointer = NULL;
void ReorderBitstream(); void ReorderBitstream();
// the BP doesn't advance and returns -1 if there is no data to be read // the BP doesn't advance and returns -1 if there is no data to be read
tIPU_BP g_BP; __aligned16 tIPU_BP g_BP;
void IPUWorker(); void IPUWorker();
@ -65,6 +67,7 @@ static u8 iq[64]; //intraquant matrix
u16 vqclut[16]; //clut conversion table u16 vqclut[16]; //clut conversion table
static u8 s_thresh[2]; //thresholds for color conversions static u8 s_thresh[2]; //thresholds for color conversions
int coded_block_pattern = 0; int coded_block_pattern = 0;
__aligned16 macroblock_8 mb8; __aligned16 macroblock_8 mb8;
__aligned16 macroblock_16 mb16; __aligned16 macroblock_16 mb16;
__aligned16 macroblock_rgb32 rgb32; __aligned16 macroblock_rgb32 rgb32;
@ -73,8 +76,7 @@ __aligned16 macroblock_rgb16 rgb16;
u8 indx4[16*16/2]; u8 indx4[16*16/2];
bool mpeg2_inited = false; //mpeg2_idct_init() must be called only once bool mpeg2_inited = false; //mpeg2_idct_init() must be called only once
u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'}; u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'};
decoder_t decoder; //static, only to place it in bss __aligned16 decoder_t decoder; //static, only to place it in bss
decoder_t tempdec;
extern "C" extern "C"
{ {
@ -96,10 +98,6 @@ void init_g_decoder()
decoder.intra_quantizer_matrix = (u8*)iq; decoder.intra_quantizer_matrix = (u8*)iq;
decoder.non_intra_quantizer_matrix = (u8*)niq; decoder.non_intra_quantizer_matrix = (u8*)niq;
decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
decoder.mb8 = &mb8;
decoder.mb16 = &mb16;
decoder.rgb32 = &rgb32;
decoder.rgb16 = &rgb16;
decoder.stride = 16; decoder.stride = 16;
} }
@ -428,8 +426,8 @@ static __forceinline BOOL ipuBDEC(u32 val, bool resume)
decoder.dcr = bdec.DCR; decoder.dcr = bdec.DCR;
decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN; decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
memzero(mb8); memzero_sse_a(mb8);
memzero(mb16); memzero_sse_a(mb16);
} }
return mpeg2_slice(); return mpeg2_slice();
@ -595,8 +593,8 @@ static BOOL __fastcall ipuCSC(u32 val)
if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE; if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
} }
ipu_csc(&mb8, &rgb32, 0); ipu_csc(mb8, rgb32, 0);
if (csc.OFM) ipu_dither(&rgb32, &rgb16, csc.DTE); if (csc.OFM) ipu_dither(rgb32, rgb16, csc.DTE);
if (csc.OFM) if (csc.OFM)
{ {
@ -637,10 +635,10 @@ static BOOL ipuPACK(u32 val)
if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE; if (!getBits64((u8*)&mb8 + 8 * ipu_cmd.pos[0], 1)) return FALSE;
} }
ipu_csc(&mb8, &rgb32, 0); ipu_csc(mb8, rgb32, 0);
ipu_dither(&rgb32, &rgb16, csc.DTE); ipu_dither(rgb32, rgb16, csc.DTE);
if (csc.OFM) ipu_vq(&rgb16, indx4); if (csc.OFM) ipu_vq(rgb16, indx4);
if (csc.OFM) if (csc.OFM)
{ {
@ -1117,10 +1115,10 @@ u8 __fastcall getBits8(u8 *address, u32 advance)
void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V, void Skl_YUV_To_RGB32_MMX(u8 *RGB, const int Dst_BpS, const u8 *Y, const u8 *U, const u8 *V,
const int Src_BpS, const int Width, const int Height); const int Src_BpS, const int Width, const int Height);
void __fastcall ipu_csc(macroblock_8 *mb8, macroblock_rgb32 *rgb32, int sgn) __forceinline void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
{ {
int i; int i;
u8* p = (u8*)rgb32; u8* p = (u8*)&rgb32;
yuv2rgb(); yuv2rgb();
@ -1151,30 +1149,30 @@ void __fastcall ipu_csc(macroblock_8 *mb8, macroblock_rgb32 *rgb32, int sgn)
} }
} }
void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb16, int dte) __forceinline void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte)
{ {
int i, j; int i, j;
for (i = 0; i < 16; ++i) for (i = 0; i < 16; ++i)
{ {
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
{ {
rgb16->c[i][j].r = rgb32->c[i][j].r >> 3; rgb16.c[i][j].r = rgb32.c[i][j].r >> 3;
rgb16->c[i][j].g = rgb32->c[i][j].g >> 3; rgb16.c[i][j].g = rgb32.c[i][j].g >> 3;
rgb16->c[i][j].b = rgb32->c[i][j].b >> 3; rgb16.c[i][j].b = rgb32.c[i][j].b >> 3;
rgb16->c[i][j].a = rgb32->c[i][j].a == 0x40; rgb16.c[i][j].a = rgb32.c[i][j].a == 0x40;
} }
} }
} }
void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4) __forceinline void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
{ {
Console.Error("IPU: VQ not implemented"); Console.Error("IPU: VQ not implemented");
} }
void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16) __forceinline void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16)
{ {
const u8 *s = (const u8*)mb8; const u8 *s = (const u8*)&mb8;
s16 *d = (s16*)mb16; s16 *d = (s16*)&mb16;
int i; int i;
for (i = 0; i < 256; i++) *d++ = *s++; //Y bias - 16 for (i = 0; i < 256; i++) *d++ = *s++; //Y bias - 16
for (i = 0; i < 64; i++) *d++ = *s++; //Cr bias - 128 for (i = 0; i < 64; i++) *d++ = *s++; //Cr bias - 128

View File

@ -16,7 +16,6 @@
#ifndef __IPU_H__ #ifndef __IPU_H__
#define __IPU_H__ #define __IPU_H__
#include "mpeg2lib/Mpeg.h"
#include "IPU_Fifo.h" #include "IPU_Fifo.h"
#ifdef _MSC_VER #ifdef _MSC_VER
@ -342,16 +341,11 @@ struct tIPU_cmd
}; };
extern tIPU_cmd ipu_cmd; extern tIPU_cmd ipu_cmd;
extern tIPU_BP g_BP;
extern int coded_block_pattern; extern int coded_block_pattern;
extern int g_nIPU0Data; // or 0x80000000 whenever transferring extern int g_nIPU0Data; // or 0x80000000 whenever transferring
extern u8* g_pIPU0Pointer; extern u8* g_pIPU0Pointer;
extern IPUStatus IPU1Status; extern IPUStatus IPU1Status;
extern tIPU_DMA g_nDMATransfer; extern tIPU_DMA g_nDMATransfer;
// The IPU can only do one task at once and never uses other buffers so these
// should be made available to functions in other modules to save registers.
extern __aligned16 macroblock_rgb32 rgb32;
extern __aligned16 macroblock_8 mb8;
extern int ipuInit(); extern int ipuInit();
extern void ipuReset(); extern void ipuReset();

View File

@ -15,8 +15,9 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "Common.h" #include "Common.h"
#include "IPU_Fifo.h"
#include "IPU.h" #include "IPU.h"
#include "mpeg2lib/Mpeg.h"
IPU_Fifo ipu_fifo; IPU_Fifo ipu_fifo;

View File

@ -243,9 +243,9 @@ int get_macroblock_address_increment()
u16 code = UBITS(16); u16 code = UBITS(16);
if (code >= 4096) if (code >= 4096)
mba = MBA_5 + (UBITS(5) - 2); mba = MBA.mba5 + (UBITS(5) - 2);
else if (code >= 768) else if (code >= 768)
mba = MBA_11 + (UBITS(11) - 24); mba = MBA.mba11 + (UBITS(11) - 24);
else switch (UBITS(11)) else switch (UBITS(11))
{ {
@ -277,16 +277,16 @@ static __forceinline int get_luma_dc_dct_diff()
if (code < 31) if (code < 31)
{ {
size = DClumtab0[code].size; size = DCtable.lum0[code].size;
DUMPBITS(DClumtab0[code].len); DUMPBITS(DCtable.lum0[code].len);
// 5 bits max // 5 bits max
} }
else else
{ {
code = UBITS(9) - 0x1f0; code = UBITS(9) - 0x1f0;
size = DClumtab1[code].size; size = DCtable.lum1[code].size;
DUMPBITS(DClumtab1[code].len); DUMPBITS(DCtable.lum1[code].len);
// 9 bits max // 9 bits max
} }
@ -313,14 +313,14 @@ static __forceinline int get_chroma_dc_dct_diff()
if (code<31) if (code<31)
{ {
size = DCchromtab0[code].size; size = DCtable.chrom0[code].size;
DUMPBITS(DCchromtab0[code].len); DUMPBITS(DCtable.chrom0[code].len);
} }
else else
{ {
code = UBITS(10) - 0x3e0; code = UBITS(10) - 0x3e0;
size = DCchromtab1[code].size; size = DCtable.chrom1[code].size;
DUMPBITS(DCchromtab1[code].len); DUMPBITS(DCtable.chrom1[code].len);
} }
if (size==0) if (size==0)
@ -371,49 +371,55 @@ static __forceinline bool get_intra_block()
if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1)) if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
{ {
tab = &DCTtabnext[(code >> 12) - 4]; tab = &DCT.next[(code >> 12) - 4];
} }
else if (code >= 1024) else if (code >= 1024)
{ {
if (decoder.intra_vlc_format && !decoder.mpeg1) if (decoder.intra_vlc_format && !decoder.mpeg1)
{ {
tab = &DCTtab0a[(code >> 8) - 4]; tab = &DCT.tab0a[(code >> 8) - 4];
} }
else else
{ {
tab = &DCTtab0[(code >> 8) - 4]; tab = &DCT.tab0[(code >> 8) - 4];
} }
} }
else if (code >= 512) else if (code >= 512)
{ {
if (decoder.intra_vlc_format && !decoder.mpeg1) if (decoder.intra_vlc_format && !decoder.mpeg1)
{ {
tab = &DCTtab1a[(code >> 6) - 8]; tab = &DCT.tab1a[(code >> 6) - 8];
} }
else else
{ {
tab = &DCTtab1[(code >> 6) - 8]; tab = &DCT.tab1[(code >> 6) - 8];
} }
} }
// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
// that should use a single unrolled DCT table instead of five separate tables used
// here. Multiple conditional statements are very slow, while modern CPU data caches
// have lots of room to spare.
else if (code >= 256) else if (code >= 256)
{ {
tab = &DCTtab2[(code >> 4) - 16]; tab = &DCT.tab2[(code >> 4) - 16];
} }
else if (code >= 128) else if (code >= 128)
{ {
tab = &DCTtab3[(code >> 3) - 16]; tab = &DCT.tab3[(code >> 3) - 16];
} }
else if (code >= 64) else if (code >= 64)
{ {
tab = &DCTtab4[(code >> 2) - 16]; tab = &DCT.tab4[(code >> 2) - 16];
} }
else if (code >= 32) else if (code >= 32)
{ {
tab = &DCTtab5[(code >> 1) - 16]; tab = &DCT.tab5[(code >> 1) - 16];
} }
else if (code >= 16) else if (code >= 16)
{ {
tab = &DCTtab6[code - 16]; tab = &DCT.tab6[code - 16];
} }
else else
{ {
@ -519,40 +525,46 @@ static __forceinline bool get_non_intra_block(int * last)
{ {
if (i==0) if (i==0)
{ {
tab = &DCTtabfirst[(code >> 12) - 4]; tab = &DCT.first[(code >> 12) - 4];
} }
else else
{ {
tab = &DCTtabnext[(code >> 12)- 4]; tab = &DCT.next[(code >> 12)- 4];
} }
} }
else if (code >= 1024) else if (code >= 1024)
{ {
tab = &DCTtab0[(code >> 8) - 4]; tab = &DCT.tab0[(code >> 8) - 4];
} }
else if (code >= 512) else if (code >= 512)
{ {
tab = &DCTtab1[(code >> 6) - 8]; tab = &DCT.tab1[(code >> 6) - 8];
} }
// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
// that should use a single unrolled DCT table instead of five separate tables used
// here. Multiple conditional statements are very slow, while modern CPU data caches
// have lots of room to spare.
else if (code >= 256) else if (code >= 256)
{ {
tab = &DCTtab2[(code >> 4) - 16]; tab = &DCT.tab2[(code >> 4) - 16];
} }
else if (code >= 128) else if (code >= 128)
{ {
tab = &DCTtab3[(code >> 3) - 16]; tab = &DCT.tab3[(code >> 3) - 16];
} }
else if (code >= 64) else if (code >= 64)
{ {
tab = &DCTtab4[(code >> 2) - 16]; tab = &DCT.tab4[(code >> 2) - 16];
} }
else if (code >= 32) else if (code >= 32)
{ {
tab = &DCTtab5[(code >> 1) - 16]; tab = &DCT.tab5[(code >> 1) - 16];
} }
else if (code >= 16) else if (code >= 16)
{ {
tab = &DCTtab6[code - 16]; tab = &DCT.tab6[code - 16];
} }
else else
{ {
@ -625,7 +637,7 @@ static __forceinline bool get_non_intra_block(int * last)
return true; return true;
} }
static bool __fastcall slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) static __forceinline bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
{ {
if (!skip || ipu_cmd.pos[3]) if (!skip || ipu_cmd.pos[3])
{ {
@ -655,13 +667,13 @@ static bool __fastcall slice_intra_DCT(const int cc, u8 * const dest, const int
return true; return true;
} }
static bool __fastcall slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) static __forceinline bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
{ {
int last; int last;
if (!skip) if (!skip)
{ {
memzero(decoder.DCTblock); memzero_sse_a(decoder.DCTblock);
} }
if (!get_non_intra_block(&last)) if (!get_non_intra_block(&last))
@ -735,8 +747,8 @@ bool mpeg2sliceIDEC()
} }
decoder.coded_block_pattern = 0x3F;//all 6 blocks decoder.coded_block_pattern = 0x3F;//all 6 blocks
memzero(*decoder.mb8); memzero_sse_a(mb8);
memzero(*decoder.rgb32); memzero_sse_a(rgb32);
case 1: case 1:
ipu_cmd.pos[1] = 1; ipu_cmd.pos[1] = 1;
@ -756,37 +768,37 @@ bool mpeg2sliceIDEC()
{ {
case 0: case 0:
case 1: case 1:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[2] == 1)) if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
{ {
ipu_cmd.pos[2] = 1; ipu_cmd.pos[2] = 1;
return false; return false;
} }
case 2: case 2:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[2] == 2)) if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
{ {
ipu_cmd.pos[2] = 2; ipu_cmd.pos[2] = 2;
return false; return false;
} }
case 3: case 3:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3)) if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
{ {
ipu_cmd.pos[2] = 3; ipu_cmd.pos[2] = 3;
return false; return false;
} }
case 4: case 4:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4)) if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
{ {
ipu_cmd.pos[2] = 4; ipu_cmd.pos[2] = 4;
return false; return false;
} }
case 5: case 5:
if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[2] == 5)) if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder.stride >> 1, ipu_cmd.pos[2] == 5))
{ {
ipu_cmd.pos[2] = 5; ipu_cmd.pos[2] = 5;
return false; return false;
} }
case 6: case 6:
if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[2] == 6)) if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder.stride >> 1, ipu_cmd.pos[2] == 6))
{ {
ipu_cmd.pos[2] = 6; ipu_cmd.pos[2] = 6;
return false; return false;
@ -794,19 +806,19 @@ bool mpeg2sliceIDEC()
} }
// Send The MacroBlock via DmaIpuFrom // Send The MacroBlock via DmaIpuFrom
ipu_csc(decoder.mb8, decoder.rgb32, decoder.sgn); ipu_csc(mb8, rgb32, decoder.sgn);
if (decoder.ofm == 0) if (decoder.ofm == 0)
{ {
g_nIPU0Data = 64; g_nIPU0Data = 64;
g_pIPU0Pointer = (u8*)decoder.rgb32; g_pIPU0Pointer = (u8*)&rgb32;
} }
else else
{ {
ipu_dither(decoder.rgb32, decoder.rgb16, decoder.dte); ipu_dither(rgb32, rgb16, decoder.dte);
g_nIPU0Data = 32; g_nIPU0Data = 32;
g_pIPU0Pointer = (u8*)decoder.rgb16; g_pIPU0Pointer = (u8*)&rgb16;
} }
case 2: case 2:
@ -841,12 +853,12 @@ bool mpeg2sliceIDEC()
code = UBITS(16); code = UBITS(16);
if (code >= 0x1000) if (code >= 0x1000)
{ {
mba = MBA_5 + (UBITS(5) - 2); mba = MBA.mba5 + (UBITS(5) - 2);
break; break;
} }
else if (code >= 0x0300) else if (code >= 0x0300)
{ {
mba = MBA_11 + (UBITS(11) - 24); mba = MBA.mba11 + (UBITS(11) - 24);
break; break;
} }
else switch (UBITS(11)) else switch (UBITS(11))
@ -942,8 +954,8 @@ bool mpeg2_slice()
ipuRegs->ctrl.ECD = 0; ipuRegs->ctrl.ECD = 0;
ipuRegs->top = 0; ipuRegs->top = 0;
memzero(*decoder.mb8); memzero_sse_a(mb8);
memzero(*decoder.mb16); memzero_sse_a(mb16);
case 1: case 1:
if (!bitstream_init()) if (!bitstream_init())
{ {
@ -972,37 +984,37 @@ bool mpeg2_slice()
case 0: case 0:
decoder.coded_block_pattern = 0x3F; decoder.coded_block_pattern = 0x3F;
case 1: case 1:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y, DCT_stride, ipu_cmd.pos[1] == 1)) if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
{ {
ipu_cmd.pos[1] = 1; ipu_cmd.pos[1] = 1;
return false; return false;
} }
case 2: case 2:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
{ {
ipu_cmd.pos[1] = 2; ipu_cmd.pos[1] = 2;
return false; return false;
} }
case 3: case 3:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
{ {
ipu_cmd.pos[1] = 3; ipu_cmd.pos[1] = 3;
return false; return false;
} }
case 4: case 4:
if (!slice_intra_DCT(0, (u8*)decoder.mb8->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
{ {
ipu_cmd.pos[1] = 4; ipu_cmd.pos[1] = 4;
return false; return false;
} }
case 5: case 5:
if (!slice_intra_DCT(1, (u8*)decoder.mb8->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5)) if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
{ {
ipu_cmd.pos[1] = 5; ipu_cmd.pos[1] = 5;
return false; return false;
} }
case 6: case 6:
if (!slice_intra_DCT(2, (u8*)decoder.mb8->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6)) if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
{ {
ipu_cmd.pos[1] = 6; ipu_cmd.pos[1] = 6;
return false; return false;
@ -1010,7 +1022,7 @@ bool mpeg2_slice()
break; break;
} }
ipu_copy(decoder.mb8, decoder.mb16); ipu_copy(mb8, mb16);
} }
else else
{ {
@ -1023,7 +1035,7 @@ bool mpeg2_slice()
case 1: case 1:
if (decoder.coded_block_pattern & 0x20) if (decoder.coded_block_pattern & 0x20)
{ {
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y, DCT_stride, ipu_cmd.pos[1] == 1)) if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
{ {
ipu_cmd.pos[1] = 1; ipu_cmd.pos[1] = 1;
return false; return false;
@ -1032,7 +1044,7 @@ bool mpeg2_slice()
case 2: case 2:
if (decoder.coded_block_pattern & 0x10) if (decoder.coded_block_pattern & 0x10)
{ {
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
{ {
ipu_cmd.pos[1] = 2; ipu_cmd.pos[1] = 2;
return false; return false;
@ -1041,7 +1053,7 @@ bool mpeg2_slice()
case 3: case 3:
if (decoder.coded_block_pattern & 0x08) if (decoder.coded_block_pattern & 0x08)
{ {
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
{ {
ipu_cmd.pos[1] = 3; ipu_cmd.pos[1] = 3;
return false; return false;
@ -1050,7 +1062,7 @@ bool mpeg2_slice()
case 4: case 4:
if (decoder.coded_block_pattern & 0x04) if (decoder.coded_block_pattern & 0x04)
{ {
if (!slice_non_intra_DCT((s16*)decoder.mb16->Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
{ {
ipu_cmd.pos[1] = 4; ipu_cmd.pos[1] = 4;
return false; return false;
@ -1059,7 +1071,7 @@ bool mpeg2_slice()
case 5: case 5:
if (decoder.coded_block_pattern & 0x2) if (decoder.coded_block_pattern & 0x2)
{ {
if (!slice_non_intra_DCT((s16*)decoder.mb16->Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5)) if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder.stride >> 1, ipu_cmd.pos[1] == 5))
{ {
ipu_cmd.pos[1] = 5; ipu_cmd.pos[1] = 5;
return false; return false;
@ -1068,7 +1080,7 @@ bool mpeg2_slice()
case 6: case 6:
if (decoder.coded_block_pattern & 0x1) if (decoder.coded_block_pattern & 0x1)
{ {
if (!slice_non_intra_DCT((s16*)decoder.mb16->Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6)) if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder.stride >> 1, ipu_cmd.pos[1] == 6))
{ {
ipu_cmd.pos[1] = 6; ipu_cmd.pos[1] = 6;
return false; return false;
@ -1098,7 +1110,7 @@ bool mpeg2_slice()
decoder.mbc = 1; decoder.mbc = 1;
g_nIPU0Data = 48; g_nIPU0Data = 48;
g_pIPU0Pointer = (u8*)decoder.mb16; g_pIPU0Pointer = (u8*)&mb16;
case 3: case 3:
while (g_nIPU0Data > 0) while (g_nIPU0Data > 0)

View File

@ -22,8 +22,50 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/ */
#ifndef __MPEG_H__ #pragma once
#define __MPEG_H__
#include <xmmintrin.h>
template< typename T >
__noinline void memzero_sse_a( T& dest )
{
#define MZFqwc (sizeof(dest)/16)
C_ASSERT( (sizeof(dest) & 0xf) == 0 );
__m128 zeroreg = _mm_setzero_ps();
float (*destxmm)[4] = (float(*)[4])&dest;
#define StoreDestIdx(idx) case idx: _mm_store_ps(&destxmm[idx][0], zeroreg)
switch( MZFqwc & 0x07 )
{
StoreDestIdx(0x07);
StoreDestIdx(0x06);
StoreDestIdx(0x05);
StoreDestIdx(0x04);
StoreDestIdx(0x03);
StoreDestIdx(0x02);
StoreDestIdx(0x01);
}
destxmm += (MZFqwc & 0x07);
for( uint i=0; i<MZFqwc / 8; ++i, destxmm+=8 )
{
_mm_store_ps(&destxmm[0][0], zeroreg);
_mm_store_ps(&destxmm[1][0], zeroreg);
_mm_store_ps(&destxmm[2][0], zeroreg);
_mm_store_ps(&destxmm[3][0], zeroreg);
_mm_store_ps(&destxmm[4][0], zeroreg);
_mm_store_ps(&destxmm[5][0], zeroreg);
_mm_store_ps(&destxmm[6][0], zeroreg);
_mm_store_ps(&destxmm[7][0], zeroreg);
}
#undef MZFqwc
};
enum macroblock_modes enum macroblock_modes
{ {
@ -81,12 +123,12 @@ struct macroblock_rgb32{
} c[16][16]; } c[16][16];
}; };
struct rgb16{ struct rgb16_t{
unsigned short r:5, g:5, b:5, a:1; unsigned short r:5, g:5, b:5, a:1;
}; };
struct macroblock_rgb16{ struct macroblock_rgb16{
struct rgb16 c[16][16]; rgb16_t c[16][16];
}; };
struct decoder_t { struct decoder_t {
@ -100,11 +142,6 @@ struct decoder_t {
u32 bitstream_buf; /* current 32 bit working set */ u32 bitstream_buf; /* current 32 bit working set */
int bitstream_bits; /* used bits in working set */ int bitstream_bits; /* used bits in working set */
struct macroblock_8 *mb8;
struct macroblock_16 *mb16;
struct macroblock_rgb32 *rgb32;
struct macroblock_rgb16 *rgb16;
int stride; int stride;
/* predictor for DC coefficients in intra blocks */ /* predictor for DC coefficients in intra blocks */
@ -172,25 +209,24 @@ extern void (__fastcall *mpeg2_idct_add) (int last, s16 * block, s16* dest, int
#define IDEC 0 #define IDEC 0
#define BDEC 1 #define BDEC 1
bool mpeg2sliceIDEC(); extern bool mpeg2sliceIDEC();
bool mpeg2_slice(); extern bool mpeg2_slice();
int get_macroblock_address_increment(); extern int get_macroblock_address_increment();
int get_macroblock_modes(); extern int get_macroblock_modes();
extern int get_motion_delta(const int f_code); extern int get_motion_delta(const int f_code);
extern int get_dmv(); extern int get_dmv();
extern int non_linear_quantizer_scale[]; extern int non_linear_quantizer_scale[];
extern decoder_t g_decoder;
void __fastcall ipu_csc(macroblock_8 *mb8, macroblock_rgb32 *rgb32, int sgn); extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
void __fastcall ipu_dither(const macroblock_rgb32* rgb32, macroblock_rgb16 *rgb16, int dte); extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
void __fastcall ipu_vq(macroblock_rgb16 *rgb16, u8* indx4); extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
void __fastcall ipu_copy(const macroblock_8 *mb8, macroblock_16 *mb16); extern void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16);
int slice (u8 * buffer); extern int slice (u8 * buffer);
/* idct.c */ /* idct.c */
void mpeg2_idct_init (); extern void mpeg2_idct_init ();
#ifdef _MSC_VER #ifdef _MSC_VER
#define BigEndian(out, in) out = _byteswap_ulong(in) #define BigEndian(out, in) out = _byteswap_ulong(in)
@ -204,4 +240,13 @@ void mpeg2_idct_init ();
#define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap... #define BigEndian64(out, in) out = __builtin_bswap64(in) // or we could use the asm function bswap...
#endif #endif
#endif//__MPEG_H__ // The IPU can only do one task at once and never uses other buffers so all mpeg state variables
// are made available to mpeg/vlc modules as globals here:
extern __aligned16 tIPU_BP g_BP;
extern __aligned16 decoder_t decoder;
extern __aligned16 macroblock_8 mb8;
extern __aligned16 macroblock_16 mb16;
extern __aligned16 macroblock_rgb32 rgb32;
extern __aligned16 macroblock_rgb16 rgb16;

View File

@ -22,6 +22,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/ */
// WARNING! This file should only be included into Mpeg.cpp AND NOWHERE ELSE.
// All contents of this file are used only by Mpeg.cpp, and including it elsewhere will
// just result in the linker having to remove a whole lot of redundant/unused decoder
// tables and static functions. -- air
#ifndef __VLC_H__ #ifndef __VLC_H__
#define __VLC_H__ #define __VLC_H__
@ -29,9 +34,6 @@ static u8 data[2];
//static u8 word[4]; //static u8 word[4];
//static u8 dword[8]; //static u8 dword[8];
//static u8 qword[16]; //static u8 qword[16];
extern tIPU_BP g_BP;
extern decoder_t decoder;
extern void ReorderBitstream();
static __forceinline int GETWORD() static __forceinline int GETWORD()
{ {
@ -137,7 +139,7 @@ static const MBtab MB_I [] = {
#define MC MACROBLOCK_MOTION_FORWARD #define MC MACROBLOCK_MOTION_FORWARD
#define CODED MACROBLOCK_PATTERN #define CODED MACROBLOCK_PATTERN
static const MBtab MB_P [] = { static const __aligned16 MBtab MB_P [] = {
{INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5},
{MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3},
{CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2},
@ -152,7 +154,7 @@ static const MBtab MB_P [] = {
#define BWD MACROBLOCK_MOTION_BACKWARD #define BWD MACROBLOCK_MOTION_BACKWARD
#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD #define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
static const MBtab MB_B [] = { static const __aligned16 MBtab MB_B [] = {
{0, 0}, {INTRA|QUANT, 6}, {0, 0}, {INTRA|QUANT, 6},
{BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6},
{INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
@ -186,7 +188,7 @@ static const MVtab MV_4 [] = {
{ 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
}; };
static const MVtab MV_10 [] = { static const __aligned16 MVtab MV_10 [] = {
{ 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
{ 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
{11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
@ -201,7 +203,7 @@ static const DMVtab DMV_2 [] = {
}; };
static const CBPtab CBP_7 [] = { static const __aligned16 CBPtab CBP_7 [] = {
{0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
{0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
{0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
@ -232,7 +234,7 @@ static const CBPtab CBP_7 [] = {
{0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
}; };
static const CBPtab CBP_9 [] = { static const __aligned16 CBPtab CBP_9 [] = {
{0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
{0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
{0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
@ -251,6 +253,8 @@ static const CBPtab CBP_9 [] = {
{0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
}; };
#if 0 // following tables are unused by PCSX2
static const DCtab DC_lum_5 [] = { static const DCtab DC_lum_5 [] = {
{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
@ -272,7 +276,6 @@ static const DCtab DC_long [] = {
{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
}; };
static const DCTtab DCT_16 [] = { static const DCTtab DCT_16 [] = {
{129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
{129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
@ -421,16 +424,22 @@ static const DCTtab DCT_B15_8 [] = {
{ 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8},
{ 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8}
}; };
#endif
struct MBAtabSet
static const MBAtab MBA_5 [] = { {
MBAtab mba5[30];
MBAtab mba11[26*4];
};
static const __aligned16 MBAtabSet MBA = {
{ // mba5
{6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
{2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
}; },
static const MBAtab MBA_11 [] = { { // mba11
{32, 11}, {31, 11}, {30, 11}, {29, 11}, {32, 11}, {31, 11}, {30, 11}, {29, 11},
{28, 11}, {27, 11}, {26, 11}, {25, 11}, {28, 11}, {27, 11}, {26, 11}, {25, 11},
{24, 11}, {23, 11}, {22, 11}, {21, 11}, {24, 11}, {23, 11}, {22, 11}, {21, 11},
@ -457,11 +466,12 @@ static const MBAtab MBA_11 [] = {
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
{ 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}
}
}; };
// New // New
#if 0 // Not used by PCSX2
/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */ /* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
static MBAtab MBAtab1[16] = static MBAtab MBAtab1[16] =
{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4}, { {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4},
@ -485,63 +495,78 @@ static MBAtab MBAtab2[104] =
{8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7},
{8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7} {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}
}; };
#endif
/* Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ struct DCtabSet
static const DCtab DClumtab0[32] = {
DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110
DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111
DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110
DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111
};
static const __aligned16 DCtabSet DCtable =
{
// lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, { {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
{4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} },
};
/* Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ /* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
static const DCtab DClumtab1[16] =
{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} },
};
/* Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ /* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
static const DCtab DCchromtab0[32] =
{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, { {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} },
};
/* Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ /* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
static const DCtab DCchromtab1[32] =
{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, { {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
{7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
{8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} },
}; };
/* Table B-14, DCT coefficients table zero, struct DCTtabSet
{
DCTtab first[12];
DCTtab next[12];
DCTtab tab0[60];
DCTtab tab0a[252];
DCTtab tab1[8];
DCTtab tab1a[8];
DCTtab tab2[16];
DCTtab tab3[16];
DCTtab tab4[16];
DCTtab tab5[16];
DCTtab tab6[16];
};
static const __aligned16 DCTtabSet DCT =
{
/* first[12]: Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for first (DC) coefficient) * codes 0100 ... 1xxx (used for first (DC) coefficient)
*/ */
static const DCTtab DCTtabfirst[12] = { {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{
{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
{0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} },
};
/* Table B-14, DCT coefficients table zero, /* next[12]: Table B-14, DCT coefficients table zero,
* codes 0100 ... 1xxx (used for all other coefficients) * codes 0100 ... 1xxx (used for all other coefficients)
*/ */
static const DCTtab DCTtabnext[12] = { {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{
{0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
{64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */ {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
{0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} },
};
/* Table B-14, DCT coefficients table zero, /* tab0[60]: Table B-14, DCT coefficients table zero,
* codes 000001xx ... 00111xxx * codes 000001xx ... 00111xxx
*/ */
static const DCTtab DCTtab0[60] = { {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{
{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{2,2,7}, {2,2,7}, {9,1,7}, {9,1,7}, {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
{0,4,7}, {0,4,7}, {8,1,7}, {8,1,7}, {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
{7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
@ -555,15 +580,12 @@ static const DCTtab DCTtab0[60] =
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
{3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} },
};
/* Table B-15, DCT coefficients table one, /* tab0a[252]: Table B-15, DCT coefficients table one,
* codes 000001xx ... 11111111 * codes 000001xx ... 11111111
*/ */
static const DCTtab DCTtab0a[252] = { {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{
{65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
{7,1,7}, {7,1,7}, {8,1,7}, {8,1,7}, {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
{6,1,7}, {6,1,7}, {2,2,7}, {2,2,7}, {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
{0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
@ -625,80 +647,60 @@ static const DCTtab DCTtab0a[252] =
{9,1,7}, {9,1,7}, {1,3,7}, {1,3,7}, {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
{10,1,7}, {10,1,7}, {0,8,7}, {0,8,7}, {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
{0,9,7}, {0,9,7}, {0,12,8}, {0,13,8}, {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
{2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} },
};
/* Table B-14, DCT coefficients table zero, /* Table B-14, DCT coefficients table zero,
* codes 0000001000 ... 0000001111 * codes 0000001000 ... 0000001111
*/ */
static const DCTtab DCTtab1[8] = { {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
{ {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} },
{16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
{1,4,10}, {15,1,10}, {14,1,10}, {4,2,10}
};
/* Table B-15, DCT coefficients table one, /* Table B-15, DCT coefficients table one,
* codes 000000100x ... 000000111x * codes 000000100x ... 000000111x
*/ */
static const DCTtab DCTtab1a[8] = { {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
{ {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} },
{5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
{2,4,10}, {16,1,10}, {15,1,9}, {15,1,9}
};
/* Table B-14/15, DCT coefficients table zero / one, /* Table B-14/15, DCT coefficients table zero / one,
* codes 000000010000 ... 000000011111 * codes 000000010000 ... 000000011111
*/ */
static const DCTtab DCTtab2[16] = { {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
{
{0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
{2,4,12}, {7,2,12}, {21,1,12}, {20,1,12}, {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
{0,9,12}, {19,1,12}, {18,1,12}, {1,5,12}, {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
{3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} },
};
/* Table B-14/15, DCT coefficients table zero / one, /* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000010000 ... 0000000011111 * codes 0000000010000 ... 0000000011111
*/ */
static const DCTtab DCTtab3[16] = { {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
{
{10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
{2,5,13}, {1,7,13}, {1,6,13}, {0,15,13}, {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
{0,14,13}, {0,13,13}, {0,12,13}, {26,1,13}, {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
{25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} },
};
/* Table B-14/15, DCT coefficients table zero / one, /* Table B-14/15, DCT coefficients table zero / one,
* codes 00000000010000 ... 00000000011111 * codes 00000000010000 ... 00000000011111
*/ */
static const DCTtab DCTtab4[16] = { {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
{
{0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
{0,27,14}, {0,26,14}, {0,25,14}, {0,24,14}, {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
{0,23,14}, {0,22,14}, {0,21,14}, {0,20,14}, {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
{0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} },
};
/* Table B-14/15, DCT coefficients table zero / one, /* Table B-14/15, DCT coefficients table zero / one,
* codes 000000000010000 ... 000000000011111 * codes 000000000010000 ... 000000000011111
*/ */
static const DCTtab DCTtab5[16] = { {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
{
{0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
{0,36,15}, {0,35,15}, {0,34,15}, {0,33,15}, {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
{0,32,15}, {1,14,15}, {1,13,15}, {1,12,15}, {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
{1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} },
};
/* Table B-14/15, DCT coefficients table zero / one, /* Table B-14/15, DCT coefficients table zero / one,
* codes 0000000000010000 ... 0000000000011111 * codes 0000000000010000 ... 0000000000011111
*/ */
static const DCTtab DCTtab6[16] = { {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
{
{1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
{6,3,16}, {16,2,16}, {15,2,16}, {14,2,16}, {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
{13,2,16}, {12,2,16}, {11,2,16}, {31,1,16}, {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
{30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} }
}; };
#endif//__VLC_H__ #endif//__VLC_H__

View File

@ -22,6 +22,7 @@
#include "Common.h" #include "Common.h"
#include "IPU.h" #include "IPU.h"
#include "yuv2rgb.h" #include "yuv2rgb.h"
#include "mpeg2lib/Mpeg.h"
// The IPU's colour space conversion conforms to ITU-R Recommendation BT.601 if anyone wants to make a // The IPU's colour space conversion conforms to ITU-R Recommendation BT.601 if anyone wants to make a
// faster or "more accurate" implementation, but this is the precise documented integer method used by // faster or "more accurate" implementation, but this is the precise documented integer method used by