diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 7d9ccc121a..140e036c50 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -687,8 +687,6 @@ set(pcsx2IPUSources set(pcsx2IPUSourcesUnshared IPU/IPU_MultiISA.cpp IPU/IPUdither.cpp - IPU/mpeg2lib/Idct.cpp - IPU/mpeg2lib/Mpeg.cpp IPU/yuv2rgb.cpp ) @@ -698,8 +696,7 @@ set(pcsx2IPUHeaders IPU/IPU_Fifo.h IPU/IPU_MultiISA.h IPU/IPUdma.h - IPU/mpeg2lib/Mpeg.h - IPU/mpeg2lib/Vlc.h + IPU/mpeg2_vlc.h IPU/yuv2rgb.h ) diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 3aa070fcd7..b0e366c74a 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -19,7 +19,6 @@ #include "IPU.h" #include "IPU_MultiISA.h" #include "IPUdma.h" -#include "mpeg2lib/Mpeg.h" #include #include "Config.h" @@ -46,10 +45,47 @@ int coded_block_pattern = 0; alignas(16) u8 g_ipu_indx4[16*16/2]; +alignas(16) const int non_linear_quantizer_scale[32] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + uint eecount_on_last_vdec = 0; bool FMVstarted = false; bool EnableFMV = false; +// Also defined in IPU_MultiISA.cpp, but IPU.cpp is not unshared. +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +__ri static u8 getBits32(u8* address, bool advance) +{ + if (!g_BP.FillBuffer(32)) + return 0; + + const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP / 8]; + + if (uint shift = (g_BP.BP & 7)) + { + u32 mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24); + + *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); + } + else + { + // Bit position-aligned -- no masking/shifting necessary + *(u32*)address = *(u32*)readpos; + } + + if (advance) + g_BP.Advance(32); + + return 1; +} + void tIPU_cmd::clear() { memzero_sse_a(*this); @@ -89,7 +125,6 @@ void ReportIPU() Console.WriteLn("thresh = 0x%x.", g_ipu_thresh); Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern); Console.WriteLn("g_decoder = 0x%x.", &decoder); - Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan); Console.WriteLn(ipu_cmd.desc()); Console.Newline(); } @@ -346,7 +381,6 @@ __fi bool ipuWrite64(u32 mem, u64 value) return true; } - ////////////////////////////////////////////////////// // IPU Commands (exec on worker thread only) @@ -416,130 +450,6 @@ static void ipuSETTH(u32 val) IPU_LOG("SETTH (Set threshold value)command %x.", val&0x1ff01ff); } -// -------------------------------------------------------------------------------------- -// Buffer reader -// -------------------------------------------------------------------------------------- - -__ri u32 UBITS(uint bits) -{ - uint readpos8 = g_BP.BP/8; - - uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 )); - uint bp7 = (g_BP.BP & 7); - result <<= bp7; - result >>= (32 - bits); - - return result; -} - -__ri s32 SBITS(uint bits) -{ - // Read an unaligned 32 bit value and then shift the bits up and then back down. - - uint readpos8 = g_BP.BP/8; - - int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 )); - uint bp7 = (g_BP.BP & 7); - result <<= bp7; - result >>= (32 - bits); - - return result; -} - -// whenever reading fractions of bytes. The low bits always come from the next byte -// while the high bits come from the current byte -u8 getBits64(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(64)) return 0; - - const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; - - if (uint shift = (g_BP.BP & 7)) - { - u64 mask = (0xff >> shift); - mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56); - - *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift); - } - else - { - *(u64*)address = *(u64*)readpos; - } - - if (advance) g_BP.Advance(64); - - return 1; -} - -// whenever reading fractions of bytes. The low bits always come from the next byte -// while the high bits come from the current byte -__fi u8 getBits32(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(32)) return 0; - - const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8]; - - if(uint shift = (g_BP.BP & 7)) - { - u32 mask = (0xff >> shift); - mask = mask | (mask << 8) | (mask << 16) | (mask << 24); - - *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); - } - else - { - // Bit position-aligned -- no masking/shifting necessary - *(u32*)address = *(u32*)readpos; - } - - if (advance) g_BP.Advance(32); - - return 1; -} - -__fi u8 getBits16(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(16)) return 0; - - const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; - - if (uint shift = (g_BP.BP & 7)) - { - uint mask = (0xff >> shift); - mask = mask | (mask << 8); - *(u16*)address = ((~mask & *(u16*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u16*)readpos) << shift); - } - else - { - *(u16*)address = *(u16*)readpos; - } - - if (advance) g_BP.Advance(16); - - return 1; -} - -u8 getBits8(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(8)) return 0; - - const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; - - if (uint shift = (g_BP.BP & 7)) - { - uint mask = (0xff >> shift); - *(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift); - } - else - { - *(u8*)address = *(u8*)readpos; - } - - if (advance) g_BP.Advance(8); - - return 1; -} - // -------------------------------------------------------------------------------------- // IPU Worker / Dispatcher // -------------------------------------------------------------------------------------- diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index fcfd2ebe78..4dc8cc03ea 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -293,7 +293,6 @@ extern bool EnableFMV; alignas(16) extern tIPU_cmd ipu_cmd; extern uint eecount_on_last_vdec; -extern int coded_block_pattern; extern bool CommandExecuteQueued; extern void ipuReset(); @@ -307,8 +306,3 @@ extern void IPUCMD_WRITE(u32 val); extern void ipuSoftReset(); extern void IPUProcessInterrupt(); -extern u8 getBits64(u8 *address, bool advance); -extern u8 getBits32(u8 *address, bool advance); -extern u8 getBits16(u8 *address, bool advance); -extern u8 getBits8(u8 *address, bool advance); - diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp index 9765b97b96..3df82a3a27 100644 --- a/pcsx2/IPU/IPU_Fifo.cpp +++ b/pcsx2/IPU/IPU_Fifo.cpp @@ -15,9 +15,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" +#include "IPU/IPU.h" #include "IPU/IPUdma.h" -#include "mpeg2lib/Mpeg.h" +#include "IPU/IPU_MultiISA.h" alignas(16) IPU_Fifo ipu_fifo; diff --git a/pcsx2/IPU/IPU_MultiISA.cpp b/pcsx2/IPU/IPU_MultiISA.cpp index 7c94d975e2..1c8d323649 100644 --- a/pcsx2/IPU/IPU_MultiISA.cpp +++ b/pcsx2/IPU/IPU_MultiISA.cpp @@ -11,20 +11,1575 @@ * * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . + * + * Some of the functions in this file are based on the mpeg2dec library, + * + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * Modified by Florin for PCSX2 emu + * + * under the GPL license. However, they have been heavily rewritten for PCSX2 usage. + * The original author's copyright statement is included above for completeness sake. */ -#include "IPU_MultiISA.h" +#include "IPU/IPU.h" +#include "IPU/IPUdma.h" +#include "IPU/yuv2rgb.h" +#include "IPU/IPU_MultiISA.h" +#include "common/MemsetFast.inl" -#include "IPU.h" -#include "IPUdma.h" -#include "yuv2rgb.h" +// the IPU is fixed to 16 byte strides (128-bit / QWC resolution): +static const uint decoder_stride = 16; + +#if MULTI_ISA_COMPILE_ONCE + +static constexpr std::array make_clip_lut() +{ + std::array lut = {}; + for (int i = -384; i < 640; i++) + lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + return lut; +} + +static constexpr mpeg2_scan_pack make_scan_pack() +{ + constexpr u8 mpeg2_scan_norm[64] = { + /* Zig-Zag scan pattern */ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 + }; + + constexpr u8 mpeg2_scan_alt[64] = { + /* Alternate scan pattern */ + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 + }; + + mpeg2_scan_pack pack = {}; + + for (int i = 0; i < 64; i++) { + int j = mpeg2_scan_norm[i]; + pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } + + return pack; +} + +alignas(16) const std::array g_idct_clip_lut = make_clip_lut(); +alignas(16) const mpeg2_scan_pack mpeg2_scan = make_scan_pack(); + +#endif MULTI_ISA_UNSHARED_START +static void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn); +static void ipu_vq(macroblock_rgb16& rgb16, u8* indx4); + +// -------------------------------------------------------------------------------------- +// Buffer reader +// -------------------------------------------------------------------------------------- + +__ri static u32 UBITS(uint bits) +{ + uint readpos8 = g_BP.BP/8; + + uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 )); + uint bp7 = (g_BP.BP & 7); + result <<= bp7; + result >>= (32 - bits); + + return result; +} + +__ri static s32 SBITS(uint bits) +{ + // Read an unaligned 32 bit value and then shift the bits up and then back down. + + uint readpos8 = g_BP.BP/8; + + int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 )); + uint bp7 = (g_BP.BP & 7); + result <<= bp7; + result >>= (32 - bits); + + return result; +} + +__fi static int GETWORD() +{ + return g_BP.FillBuffer(16); +} + +// Removes bits from the bitstream. This is done independently of UBITS/SBITS because a +// lot of mpeg streams have to read ahead and rewind bits and re-read them at different +// bit depths or sign'age. +__fi static void DUMPBITS(uint num) +{ + g_BP.Advance(num); + //pxAssume(g_BP.FP != 0); +} + +__fi static u32 GETBITS(uint num) +{ + uint retVal = UBITS(num); + g_BP.Advance(num); + + return retVal; +} + +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +__ri static u8 getBits64(u8 *address, bool advance) +{ + if (!g_BP.FillBuffer(64)) return 0; + + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; + + if (uint shift = (g_BP.BP & 7)) + { + u64 mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56); + + *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift); + } + else + { + *(u64*)address = *(u64*)readpos; + } + + if (advance) g_BP.Advance(64); + + return 1; +} + +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +__ri static u8 getBits32(u8 *address, bool advance) +{ + if (!g_BP.FillBuffer(32)) return 0; + + const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8]; + + if(uint shift = (g_BP.BP & 7)) + { + u32 mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24); + + *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); + } + else + { + // Bit position-aligned -- no masking/shifting necessary + *(u32*)address = *(u32*)readpos; + } + + if (advance) g_BP.Advance(32); + + return 1; +} + +__ri static u8 getBits8(u8 *address, bool advance) +{ + if (!g_BP.FillBuffer(8)) return 0; + + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; + + if (uint shift = (g_BP.BP & 7)) + { + uint mask = (0xff >> shift); + *(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift); + } + else + { + *(u8*)address = *(u8*)readpos; + } + + if (advance) g_BP.Advance(8); + + return 1; +} + + +#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ +#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ +#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ +#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ +#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ +#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ + +/* + * In legal streams, the IDCT output should be between -384 and +384. + * In corrupted streams, it is possible to force the IDCT output to go + * to +-3826 - this is the worst case for a column IDCT where the + * column inputs are 16-bit values. + */ + +__fi static void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1) +{ + int tmp = w0 * (d0 + d1); + t0 = tmp + (w1 - w0) * d1; + t1 = tmp - (w1 + w0) * d0; +} + +__ri static void IDCT_Block(s16* block) +{ + for (int i = 0; i < 8; i++) + { + s16* const rblock = block + 8 * i; + if (!(rblock[1] | ((s32*)rblock)[1] | ((s32*)rblock)[2] | + ((s32*)rblock)[3])) + { + u32 tmp = (u16)(rblock[0] << 3); + tmp |= tmp << 16; + ((s32*)rblock)[0] = tmp; + ((s32*)rblock)[1] = tmp; + ((s32*)rblock)[2] = tmp; + ((s32*)rblock)[3] = tmp; + continue; + } + + int a0, a1, a2, a3; + { + const int d0 = (rblock[0] << 11) + 128; + const int d1 = rblock[1]; + const int d2 = rblock[2] << 11; + const int d3 = rblock[3]; + int t0 = d0 + d2; + int t1 = d0 - d2; + int t2, t3; + BUTTERFLY(t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + } + + int b0, b1, b2, b3; + { + const int d0 = rblock[4]; + const int d1 = rblock[5]; + const int d2 = rblock[6]; + const int d3 = rblock[7]; + int t0, t1, t2, t3; + BUTTERFLY(t0, t1, W7, W1, d3, d0); + BUTTERFLY(t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) * 181) >> 8; + b2 = ((t0 - t1) * 181) >> 8; + } + + rblock[0] = (a0 + b0) >> 8; + rblock[1] = (a1 + b1) >> 8; + rblock[2] = (a2 + b2) >> 8; + rblock[3] = (a3 + b3) >> 8; + rblock[4] = (a3 - b3) >> 8; + rblock[5] = (a2 - b2) >> 8; + rblock[6] = (a1 - b1) >> 8; + rblock[7] = (a0 - b0) >> 8; + } + + for (int i = 0; i < 8; i++) + { + s16* const cblock = block + i; + + int a0, a1, a2, a3; + { + const int d0 = (cblock[8 * 0] << 11) + 65536; + const int d1 = cblock[8 * 1]; + const int d2 = cblock[8 * 2] << 11; + const int d3 = cblock[8 * 3]; + const int t0 = d0 + d2; + const int t1 = d0 - d2; + int t2; + int t3; + BUTTERFLY(t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + } + + int b0, b1, b2, b3; + { + const int d0 = cblock[8 * 4]; + const int d1 = cblock[8 * 5]; + const int d2 = cblock[8 * 6]; + const int d3 = cblock[8 * 7]; + int t0, t1, t2, t3; + BUTTERFLY(t0, t1, W7, W1, d3, d0); + BUTTERFLY(t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 = (t0 - t2) >> 8; + t1 = (t1 - t3) >> 8; + b1 = (t0 + t1) * 181; + b2 = (t0 - t1) * 181; + } + + cblock[8 * 0] = (a0 + b0) >> 17; + cblock[8 * 1] = (a1 + b1) >> 17; + cblock[8 * 2] = (a2 + b2) >> 17; + cblock[8 * 3] = (a3 + b3) >> 17; + cblock[8 * 4] = (a3 - b3) >> 17; + cblock[8 * 5] = (a2 - b2) >> 17; + cblock[8 * 6] = (a1 - b1) >> 17; + cblock[8 * 7] = (a0 - b0) >> 17; + } +} + +__ri static void IDCT_Copy(s16* block, u8* dest, const int stride) +{ + IDCT_Block(block); + + for (int i = 0; i < 8; i++) + { + dest[0] = (g_idct_clip_lut.data() + 384)[block[0]]; + dest[1] = (g_idct_clip_lut.data() + 384)[block[1]]; + dest[2] = (g_idct_clip_lut.data() + 384)[block[2]]; + dest[3] = (g_idct_clip_lut.data() + 384)[block[3]]; + dest[4] = (g_idct_clip_lut.data() + 384)[block[4]]; + dest[5] = (g_idct_clip_lut.data() + 384)[block[5]]; + dest[6] = (g_idct_clip_lut.data() + 384)[block[6]]; + dest[7] = (g_idct_clip_lut.data() + 384)[block[7]]; + + std::memset(block, 0, 16); + + dest += stride; + block += 8; + } +} + + +// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]). +__ri static void IDCT_Add(const int last, s16* block, s16* dest, const int stride) +{ + // on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0). + + if (last != 129 || (block[0] & 7) == 4) + { + IDCT_Block(block); + + __m128 zero = _mm_setzero_ps(); + for (int i = 0; i < 8; i++) + { + _mm_store_ps((float*)dest, _mm_load_ps((float*)block)); + _mm_store_ps((float*)block, zero); + + dest += stride; + block += 8; + } + } + else + { + s16 DC = ((int)block[0] + 4) >> 3; + s16 dcf[2] = {DC, DC}; + block[0] = block[63] = 0; + + __m128 dc128 = _mm_set_ps1(*(float*)dcf); + + for (int i = 0; i < 8; ++i) + _mm_store_ps((float*)(dest + (stride * i)), dc128); + } +} + +/* Bitstream and buffer needs to be reallocated in order for successful + reading of the old data. Here the old data stored in the 2nd slot + of the internal buffer is copied to 1st slot, and the new data read + into 1st slot is copied to the 2nd slot. Which will later be copied + back to the 1st slot when 128bits have been read. +*/ +static const DCTtab * tab; +static int mbaCount = 0; + +__ri static int BitstreamInit () +{ + return g_BP.FillBuffer(32); +} + +static int GetMacroblockModes() +{ + int macroblock_modes; + const MBtab * tab; + + switch (decoder.coding_type) + { + case I_TYPE: + macroblock_modes = UBITS(2); + + if (macroblock_modes == 0) return 0; // error + + tab = MB_I + (macroblock_modes >> 1); + DUMPBITS(tab->len); + macroblock_modes = tab->modes; + + if ((!(decoder.frame_pred_frame_dct)) && + (decoder.picture_structure == FRAME_PICTURE)) + { + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; + } + return macroblock_modes; + + case P_TYPE: + macroblock_modes = UBITS(6); + + if (macroblock_modes == 0) return 0; // error + + tab = MB_P + (macroblock_modes >> 1); + DUMPBITS(tab->len); + macroblock_modes = tab->modes; + + if (decoder.picture_structure != FRAME_PICTURE) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + { + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + } + + return macroblock_modes; + } + else if (decoder.frame_pred_frame_dct) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + + return macroblock_modes; + } + else + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + { + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + } + + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; + } + + return macroblock_modes; + } + + case B_TYPE: + macroblock_modes = UBITS(6); + + if (macroblock_modes == 0) return 0; // error + + tab = MB_B + macroblock_modes; + DUMPBITS(tab->len); + macroblock_modes = tab->modes; + + if (decoder.picture_structure != FRAME_PICTURE) + { + if (!(macroblock_modes & MACROBLOCK_INTRA)) + { + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + } + return (macroblock_modes | (tab->len << 16)); + } + else if (decoder.frame_pred_frame_dct) + { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return (macroblock_modes | (tab->len << 16)); + } + else + { + if (macroblock_modes & MACROBLOCK_INTRA) goto intra; + + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { +intra: + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; + } + return (macroblock_modes | (tab->len << 16)); + } + + case D_TYPE: + macroblock_modes = GETBITS(1); + //I suspect (as this is actually a 2 bit command) that this should be getbits(2) + //additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction) + DevCon.Warning(" Rare MPEG command! "); + if (macroblock_modes == 0) return 0; // error + return (MACROBLOCK_INTRA | (1 << 16)); + + default: + return 0; + } +} + +__ri static int get_macroblock_address_increment() +{ + const MBAtab *mba; + + u16 code = UBITS(16); + + if (code >= 4096) + mba = MBA.mba5 + (UBITS(5) - 2); + else if (code >= 768) + mba = MBA.mba11 + (UBITS(11) - 24); + else switch (UBITS(11)) + { + case 8: /* macroblock_escape */ + DUMPBITS(11); + return 0xb0023; + + case 15: /* macroblock_stuffing (MPEG1 only) */ + if (decoder.mpeg1) + { + DUMPBITS(11); + return 0xb0022; + } + [[fallthrough]]; + + default: + return 0;//error + } + + DUMPBITS(mba->len); + + return ((mba->mba + 1) | (mba->len << 16)); +} + +__fi static int get_luma_dc_dct_diff() +{ + int size; + int dc_diff; + u16 code = UBITS(5); + + if (code < 31) + { + size = DCtable.lum0[code].size; + DUMPBITS(DCtable.lum0[code].len); + + // 5 bits max + } + else + { + code = UBITS(9) - 0x1f0; + size = DCtable.lum1[code].size; + DUMPBITS(DCtable.lum1[code].len); + + // 9 bits max + } + + if (size==0) + dc_diff = 0; + else + { + dc_diff = GETBITS(size); + + // 6 for tab0 and 11 for tab1 + if ((dc_diff & (1<<(size-1)))==0) + dc_diff-= (1< 4095) + val = (val >> 31) ^ 2047; +} + +__ri static bool get_intra_block() +{ + const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; + const u8 (&quant_matrix)[64] = decoder.iq; + int quantizer_scale = decoder.quantizer_scale; + s16 * dest = decoder.DCTblock; + u16 code; + + /* decode AC coefficients */ + for (int i=1 + ipu_cmd.pos[4]; ; i++) + { + switch (ipu_cmd.pos[5]) + { + case 0: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i - 1; + return false; + } + + code = UBITS(16); + + if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1)) + { + tab = &DCT.next[(code >> 12) - 4]; + } + else if (code >= 1024) + { + if (decoder.intra_vlc_format && !decoder.mpeg1) + { + tab = &DCT.tab0a[(code >> 8) - 4]; + } + else + { + tab = &DCT.tab0[(code >> 8) - 4]; + } + } + else if (code >= 512) + { + if (decoder.intra_vlc_format && !decoder.mpeg1) + { + tab = &DCT.tab1a[(code >> 6) - 8]; + } + else + { + tab = &DCT.tab1[(code >> 6) - 8]; + } + } + + // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup + // that should use a single unrolled DCT table instead of five separate tables used + // here. Multiple conditional statements are very slow, while modern CPU data caches + // have lots of room to spare. + + else if (code >= 256) + { + tab = &DCT.tab2[(code >> 4) - 16]; + } + else if (code >= 128) + { + tab = &DCT.tab3[(code >> 3) - 16]; + } + else if (code >= 64) + { + tab = &DCT.tab4[(code >> 2) - 16]; + } + else if (code >= 32) + { + tab = &DCT.tab5[(code >> 1) - 16]; + } + else if (code >= 16) + { + tab = &DCT.tab6[code - 16]; + } + else + { + ipu_cmd.pos[4] = 0; + return true; + } + + DUMPBITS(tab->len); + + if (tab->run==64) /* end_of_block */ + { + ipu_cmd.pos[4] = 0; + return true; + } + + i += (tab->run == 65) ? GETBITS(6) : tab->run; + if (i >= 64) + { + ipu_cmd.pos[4] = 0; + return true; + } + [[fallthrough]]; + + case 1: + { + if (!GETWORD()) + { + ipu_cmd.pos[4] = i - 1; + ipu_cmd.pos[5] = 1; + return false; + } + + uint j = scan[i]; + int val; + + if (tab->run==65) /* escape */ + { + if(!decoder.mpeg1) + { + val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); + + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = (val * quantizer_scale * quant_matrix[i]) >> 4; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + if(decoder.mpeg1) + { + /* oddification */ + val = (val - 1) | 1; + } + + /* if (bitstream_get (1)) val = -val; */ + int bit1 = SBITS(1); + val = (val ^ bit1) - bit1; + DUMPBITS(1); + } + + SATURATE(val); + dest[j] = val; + ipu_cmd.pos[5] = 0; + } + } + } + + ipu_cmd.pos[4] = 0; + return true; +} + +__ri static bool get_non_intra_block(int * last) +{ + int i; + int j; + int val; + const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; + const u8 (&quant_matrix)[64] = decoder.niq; + int quantizer_scale = decoder.quantizer_scale; + s16 * dest = decoder.DCTblock; + u16 code; + + /* decode AC coefficients */ + for (i= ipu_cmd.pos[4] ; ; i++) + { + switch (ipu_cmd.pos[5]) + { + case 0: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i; + return false; + } + + code = UBITS(16); + + if (code >= 16384) + { + if (i==0) + { + tab = &DCT.first[(code >> 12) - 4]; + } + else + { + tab = &DCT.next[(code >> 12)- 4]; + } + } + else if (code >= 1024) + { + tab = &DCT.tab0[(code >> 8) - 4]; + } + else if (code >= 512) + { + tab = &DCT.tab1[(code >> 6) - 8]; + } + + // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup + // that should use a single unrolled DCT table instead of five separate tables used + // here. Multiple conditional statements are very slow, while modern CPU data caches + // have lots of room to spare. + + else if (code >= 256) + { + tab = &DCT.tab2[(code >> 4) - 16]; + } + else if (code >= 128) + { + tab = &DCT.tab3[(code >> 3) - 16]; + } + else if (code >= 64) + { + tab = &DCT.tab4[(code >> 2) - 16]; + } + else if (code >= 32) + { + tab = &DCT.tab5[(code >> 1) - 16]; + } + else if (code >= 16) + { + tab = &DCT.tab6[code - 16]; + } + else + { + ipu_cmd.pos[4] = 0; + return true; + } + + DUMPBITS(tab->len); + + if (tab->run==64) /* end_of_block */ + { + *last = i; + ipu_cmd.pos[4] = 0; + return true; + } + + i += (tab->run == 65) ? GETBITS(6) : tab->run; + if (i >= 64) + { + *last = i; + ipu_cmd.pos[4] = 0; + return true; + } + [[fallthrough]]; + + case 1: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i; + ipu_cmd.pos[5] = 1; + return false; + } + + j = scan[i]; + + if (tab->run==65) /* escape */ + { + if (!decoder.mpeg1) + { + val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); + + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + int bit1 = SBITS(1); + val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; + val = (val ^ bit1) - bit1; + DUMPBITS(1); + } + + SATURATE(val); + dest[j] = val; + ipu_cmd.pos[5] = 0; + } + } + + ipu_cmd.pos[4] = 0; + return true; +} + +__ri static bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) +{ + if (!skip || ipu_cmd.pos[3]) + { + ipu_cmd.pos[3] = 0; + if (!GETWORD()) + { + ipu_cmd.pos[3] = 1; + return false; + } + + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + decoder.dc_dct_pred[0] += get_luma_dc_dct_diff(); + else + decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff(); + + decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision); + } + + if (!get_intra_block()) + { + return false; + } + + IDCT_Copy(decoder.DCTblock, dest, stride); + + return true; +} + +__ri static bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) +{ + int last; + + if (!skip) + { + memzero_sse_a(decoder.DCTblock); + } + + if (!get_non_intra_block(&last)) + { + return false; + } + + IDCT_Add(last, decoder.DCTblock, dest, stride); + + return true; +} + +__fi static void finishmpeg2sliceIDEC() +{ + ipuRegs.ctrl.SCD = 0; + coded_block_pattern = decoder.coded_block_pattern; +} + +__ri static bool mpeg2sliceIDEC() +{ + u16 code; + + switch (ipu_cmd.pos[0]) + { + case 0: + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + + ipuRegs.top = 0; + ipuRegs.ctrl.ECD = 0; + [[fallthrough]]; + + case 1: + ipu_cmd.pos[0] = 1; + if (!BitstreamInit()) + { + return false; + } + [[fallthrough]]; + + case 2: + ipu_cmd.pos[0] = 2; + while (1) + { + // IPU0 isn't ready for data, so let's wait for it to be + if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[1] <= 2) + { + return false; + } + macroblock_8& mb8 = decoder.mb8; + macroblock_rgb16& rgb16 = decoder.rgb16; + macroblock_rgb32& rgb32 = decoder.rgb32; + + int DCT_offset, DCT_stride; + const MBAtab * mba; + + switch (ipu_cmd.pos[1]) + { + case 0: + decoder.macroblock_modes = GetMacroblockModes(); + + if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC + { + const int quantizer_scale_code = GETBITS(5); + if (decoder.q_scale_type) + decoder.quantizer_scale = non_linear_quantizer_scale[quantizer_scale_code]; + else + decoder.quantizer_scale = quantizer_scale_code << 1; + } + + decoder.coded_block_pattern = 0x3F;//all 6 blocks + memzero_sse_a(mb8); + memzero_sse_a(rgb32); + [[fallthrough]]; + + case 1: + ipu_cmd.pos[1] = 1; + + if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) + { + DCT_offset = decoder_stride; + DCT_stride = decoder_stride * 2; + } + else + { + DCT_offset = decoder_stride * 8; + DCT_stride = decoder_stride; + } + + switch (ipu_cmd.pos[2]) + { + case 0: + case 1: + if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1)) + { + ipu_cmd.pos[2] = 1; + return false; + } + [[fallthrough]]; + + case 2: + if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2)) + { + ipu_cmd.pos[2] = 2; + return false; + } + [[fallthrough]]; + + case 3: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3)) + { + ipu_cmd.pos[2] = 3; + return false; + } + [[fallthrough]]; + + case 4: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4)) + { + ipu_cmd.pos[2] = 4; + return false; + } + [[fallthrough]]; + + case 5: + if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5)) + { + ipu_cmd.pos[2] = 5; + return false; + } + [[fallthrough]]; + + case 6: + if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6)) + { + ipu_cmd.pos[2] = 6; + return false; + } + break; + + jNO_DEFAULT; + } + + // Send The MacroBlock via DmaIpuFrom + ipu_csc(mb8, rgb32, decoder.sgn); + + if (decoder.ofm == 0) + decoder.SetOutputTo(rgb32); + else + { + ipu_dither(rgb32, rgb16, decoder.dte); + decoder.SetOutputTo(rgb16); + } + [[fallthrough]]; + + case 2: + { + + pxAssert(decoder.ipu0_data > 0); + + uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + decoder.AdvanceIpuDataBy(read); + + if (decoder.ipu0_data != 0) + { + // IPU FIFO filled up -- Will have to finish transferring later. + ipu_cmd.pos[1] = 2; + return false; + } + + mbaCount = 0; + if (read) + { + ipu_cmd.pos[1] = 3; + return false; + } + } + [[fallthrough]]; + + case 3: + while (1) + { + if (!GETWORD()) + { + ipu_cmd.pos[1] = 3; + return false; + } + + code = UBITS(16); + if (code >= 0x1000) + { + mba = MBA.mba5 + (UBITS(5) - 2); + break; + } + else if (code >= 0x0300) + { + mba = MBA.mba11 + (UBITS(11) - 24); + break; + } + else switch (UBITS(11)) + { + case 8: /* macroblock_escape */ + mbaCount += 33; + [[fallthrough]]; + + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS(11); + continue; + + default: /* end of slice/frame, or error? */ + { + goto finish_idec; + } + } + } + + DUMPBITS(mba->len); + mbaCount += mba->mba; + + if (mbaCount) + { + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + } + [[fallthrough]]; + + case 4: + if (!GETWORD()) + { + ipu_cmd.pos[1] = 4; + return false; + } + break; + + jNO_DEFAULT; + } + + ipu_cmd.pos[1] = 0; + ipu_cmd.pos[2] = 0; + } + +finish_idec: + finishmpeg2sliceIDEC(); + [[fallthrough]]; + + case 3: + { + u8 bit8; + u32 start_check; + if (!getBits8((u8*)&bit8, 0)) + { + ipu_cmd.pos[0] = 3; + return false; + } + + if (bit8 == 0) + { + g_BP.Align(); + do + { + if (!g_BP.FillBuffer(24)) + { + ipu_cmd.pos[0] = 3; + return false; + } + start_check = UBITS(24); + if (start_check != 0) + { + if (start_check == 1) + { + ipuRegs.ctrl.SCD = 1; + } + else + { + ipuRegs.ctrl.ECD = 1; + } + break; + } + DUMPBITS(8); + } while (1); + } + } + [[fallthrough]]; + + case 4: + if (!getBits32((u8*)&ipuRegs.top, 0)) + { + ipu_cmd.pos[0] = 4; + return false; + } + + ipuRegs.top = BigEndian(ipuRegs.top); + break; + + jNO_DEFAULT; + } + + return true; +} + +__fi static bool mpeg2_slice() +{ + int DCT_offset, DCT_stride; + + macroblock_8& mb8 = decoder.mb8; + macroblock_16& mb16 = decoder.mb16; + + switch (ipu_cmd.pos[0]) + { + case 0: + if (decoder.dcr) + { + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + } + + ipuRegs.ctrl.ECD = 0; + ipuRegs.top = 0; + memzero_sse_a(mb8); + memzero_sse_a(mb16); + [[fallthrough]]; + + case 1: + if (!BitstreamInit()) + { + ipu_cmd.pos[0] = 1; + return false; + } + [[fallthrough]]; + + case 2: + ipu_cmd.pos[0] = 2; + + // IPU0 isn't ready for data, so let's wait for it to be + if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[0] <= 3) + { + return false; + } + + if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) + { + DCT_offset = decoder_stride; + DCT_stride = decoder_stride * 2; + } + else + { + DCT_offset = decoder_stride * 8; + DCT_stride = decoder_stride; + } + + if (decoder.macroblock_modes & MACROBLOCK_INTRA) + { + switch(ipu_cmd.pos[1]) + { + case 0: + decoder.coded_block_pattern = 0x3F; + [[fallthrough]]; + + case 1: + if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1)) + { + ipu_cmd.pos[1] = 1; + return false; + } + [[fallthrough]]; + + case 2: + if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) + { + ipu_cmd.pos[1] = 2; + return false; + } + [[fallthrough]]; + + case 3: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) + { + ipu_cmd.pos[1] = 3; + return false; + } + [[fallthrough]]; + + case 4: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) + { + ipu_cmd.pos[1] = 4; + return false; + } + [[fallthrough]]; + + case 5: + if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) + { + ipu_cmd.pos[1] = 5; + return false; + } + [[fallthrough]]; + + case 6: + if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) + { + ipu_cmd.pos[1] = 6; + return false; + } + break; + + jNO_DEFAULT; + } + + // Copy macroblock8 to macroblock16 - without sign extension. + // Manually inlined due to MSVC refusing to inline the SSE-optimized version. + { + const u8 *s = (const u8*)&mb8; + u16 *d = (u16*)&mb16; + + //Y bias - 16 * 16 + //Cr bias - 8 * 8 + //Cb bias - 8 * 8 + + __m128i zeroreg = _mm_setzero_si128(); + + for (uint i = 0; i < (256+64+64) / 32; ++i) + { + //*d++ = *s++; + __m128i woot1 = _mm_load_si128((__m128i*)s); + __m128i woot2 = _mm_load_si128((__m128i*)s+1); + _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg)); + _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg)); + _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg)); + _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg)); + s += 32; + d += 32; + } + } + } + else + { + if (decoder.macroblock_modes & MACROBLOCK_PATTERN) + { + switch(ipu_cmd.pos[1]) + { + case 0: + { + // Get coded block pattern + const CBPtab* tab; + u16 code = UBITS(16); + + if (code >= 0x2000) + tab = CBP_7 + (UBITS(7) - 16); + else + tab = CBP_9 + UBITS(9); + + DUMPBITS(tab->len); + decoder.coded_block_pattern = tab->cbp; + } + [[fallthrough]]; + + case 1: + if (decoder.coded_block_pattern & 0x20) + { + if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1)) + { + ipu_cmd.pos[1] = 1; + return false; + } + } + [[fallthrough]]; + + case 2: + if (decoder.coded_block_pattern & 0x10) + { + if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) + { + ipu_cmd.pos[1] = 2; + return false; + } + } + [[fallthrough]]; + + case 3: + if (decoder.coded_block_pattern & 0x08) + { + if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) + { + ipu_cmd.pos[1] = 3; + return false; + } + } + [[fallthrough]]; + + case 4: + if (decoder.coded_block_pattern & 0x04) + { + if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) + { + ipu_cmd.pos[1] = 4; + return false; + } + } + [[fallthrough]]; + + case 5: + if (decoder.coded_block_pattern & 0x2) + { + if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) + { + ipu_cmd.pos[1] = 5; + return false; + } + } + [[fallthrough]]; + + case 6: + if (decoder.coded_block_pattern & 0x1) + { + if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) + { + ipu_cmd.pos[1] = 6; + return false; + } + } + break; + + jNO_DEFAULT; + } + } + } + + // Send The MacroBlock via DmaIpuFrom + ipuRegs.ctrl.SCD = 0; + coded_block_pattern = decoder.coded_block_pattern; + + decoder.SetOutputTo(mb16); + [[fallthrough]]; + + case 3: + { + pxAssert(decoder.ipu0_data > 0); + + uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + decoder.AdvanceIpuDataBy(read); + + if (decoder.ipu0_data != 0) + { + // IPU FIFO filled up -- Will have to finish transferring later. + ipu_cmd.pos[0] = 3; + return false; + } + + mbaCount = 0; + if (read) + { + ipu_cmd.pos[0] = 4; + return false; + } + } + [[fallthrough]]; + + case 4: + { + u8 bit8; + u32 start_check; + if (!getBits8((u8*)&bit8, 0)) + { + ipu_cmd.pos[0] = 4; + return false; + } + + if (bit8 == 0) + { + g_BP.Align(); + do + { + if (!g_BP.FillBuffer(24)) + { + ipu_cmd.pos[0] = 4; + return false; + } + start_check = UBITS(24); + if (start_check != 0) + { + if (start_check == 1) + { + ipuRegs.ctrl.SCD = 1; + } + else + { + ipuRegs.ctrl.ECD = 1; + } + break; + } + DUMPBITS(8); + } while (1); + } + } + [[fallthrough]]; + + case 5: + if (!getBits32((u8*)&ipuRegs.top, 0)) + { + ipu_cmd.pos[0] = 5; + return false; + } + + ipuRegs.top = BigEndian(ipuRegs.top); + break; + } + + return true; +} + + ////////////////////////////////////////////////////// // IPU Commands (exec on worker thread only) -static __fi bool ipuVDEC(u32 val) +__fi static bool ipuVDEC(u32 val) { static int count = 0; if (count++ > 5) { @@ -39,7 +1594,7 @@ static __fi bool ipuVDEC(u32 val) switch (ipu_cmd.pos[0]) { case 0: - if (!bitstream_init()) return false; + if (!BitstreamInit()) return false; switch ((val >> 26) & 3) { @@ -51,15 +1606,42 @@ static __fi bool ipuVDEC(u32 val) case 1://Macroblock Type decoder.frame_pred_frame_dct = 1; decoder.coding_type = ipuRegs.ctrl.PCT > 0 ? ipuRegs.ctrl.PCT : 1; // Kaiketsu Zorro Mezase doesn't set a Picture type, seems happy with I - ipuRegs.cmd.DATA = get_macroblock_modes(); + ipuRegs.cmd.DATA = GetMacroblockModes(); break; case 2://Motion Code - ipuRegs.cmd.DATA = get_motion_delta(0); + { + const u16 code = UBITS(16); + if ((code & 0x8000)) + { + DUMPBITS(1); + ipuRegs.cmd.DATA = 0x00010000; + } + else + { + const MVtab* tab; + if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00)) + tab = MV_4 + UBITS(4); + else + tab = MV_10 + UBITS(10); + + const int delta = tab->delta + 1; + DUMPBITS(tab->len); + + const int sign = SBITS(1); + DUMPBITS(1); + + ipuRegs.cmd.DATA = (((delta ^ sign) - sign) | (tab->len << 16)); + } + } break; case 3://DMVector - ipuRegs.cmd.DATA = get_dmv(); + { + const DMVtab* tab = DMV_2 + UBITS(2); + DUMPBITS(tab->len); + ipuRegs.cmd.DATA = (tab->dmv | (tab->len << 16)); + } break; jNO_DEFAULT @@ -100,7 +1682,7 @@ static __fi bool ipuVDEC(u32 val) return false; } -static __ri bool ipuFDEC(u32 val) +__ri static bool ipuFDEC(u32 val) { if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false; @@ -185,7 +1767,7 @@ static bool ipuSETVQ(u32 val) } // IPU Transfers are split into 8Qwords so we need to send ALL the data -static __ri bool ipuCSC(tIPU_CMD_CSC csc) +__ri static bool ipuCSC(tIPU_CMD_CSC csc) { csc.log_from_YCbCr(); @@ -217,7 +1799,7 @@ static __ri bool ipuCSC(tIPU_CMD_CSC csc) return true; } -static __ri bool ipuPACK(tIPU_CMD_CSC csc) +__ri static bool ipuPACK(tIPU_CMD_CSC csc) { csc.log_from_RGB32(); @@ -254,7 +1836,7 @@ static __ri bool ipuPACK(tIPU_CMD_CSC csc) // CORE Functions (referenced from MPEG library) // -------------------------------------------------------------------------------------- -__fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) +__fi static void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) { int i; u8* p = (u8*)&rgb32; @@ -288,7 +1870,7 @@ __fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) } } -__fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) +__fi static void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) { const auto closest_index = [&](int i, int j) { u8 index = 0; diff --git a/pcsx2/IPU/IPU_MultiISA.h b/pcsx2/IPU/IPU_MultiISA.h index d671e9a6f0..b56e07ae5c 100644 --- a/pcsx2/IPU/IPU_MultiISA.h +++ b/pcsx2/IPU/IPU_MultiISA.h @@ -15,13 +15,163 @@ #pragma once +#include "IPU/IPU.h" +#include "IPU/mpeg2_vlc.h" #include "GS/MultiISA.h" -#include "mpeg2lib/Mpeg.h" -MULTI_ISA_DEF(void IPUWorker();) +#ifdef _MSC_VER +#include +#endif + +#ifdef _MSC_VER +#define BigEndian(in) _byteswap_ulong(in) +#else +#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap... +#endif + +#ifdef _MSC_VER +#define BigEndian64(in) _byteswap_uint64(in) +#else +#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap... +#endif + +struct macroblock_8{ + u8 Y[16][16]; //0 + u8 Cb[8][8]; //1 + u8 Cr[8][8]; //2 +}; + +struct macroblock_16{ + s16 Y[16][16]; //0 + s16 Cb[8][8]; //1 + s16 Cr[8][8]; //2 +}; + +struct macroblock_rgb32{ + struct { + u8 r, g, b, a; + } c[16][16]; +}; + +struct rgb16_t{ + u16 r:5, g:5, b:5, a:1; +}; + +struct macroblock_rgb16{ + rgb16_t c[16][16]; +}; + +struct decoder_t { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* DCT coefficients - should be kept aligned ! */ + s16 DCTblock[64]; + + u8 niq[64]; //non-intraquant matrix (sequence header) + u8 iq[64]; //intraquant matrix (sequence header) + + macroblock_8 mb8; + macroblock_16 mb16; + macroblock_rgb32 rgb32; + macroblock_rgb16 rgb16; + + uint ipu0_data; // amount of data in the output macroblock (in QWC) + uint ipu0_idx; + + int quantizer_scale; + + /* now non-slice-specific information */ + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int coding_type; + + /* picture coding extension stuff */ + + /* predictor for DC coefficients in intra blocks */ + s16 dc_dct_pred[3]; + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bit to indicate which quantization table to use */ + int q_scale_type; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + // Pseudo Sign Offset + int sgn; + // Dither Enable + int dte; + // Output Format + int ofm; + // Macroblock type + int macroblock_modes; + // DC Reset + int dcr; + // Coded block pattern + int coded_block_pattern; + + /* stuff derived from bitstream */ + + /* the zigzag scan we're supposed to be using, true for alt, false for normal */ + bool scantype; + + int mpeg1; + + template< typename T > + void SetOutputTo( T& obj ) + { + uint mb_offset = ((uptr)&obj - (uptr)&mb8); + pxAssume( (mb_offset & 15) == 0 ); + ipu0_idx = mb_offset / 16; + ipu0_data = sizeof(obj)/16; + } + + u128* GetIpuDataPtr() + { + return ((u128*)&mb8) + ipu0_idx; + } + + void AdvanceIpuDataBy(uint amt) + { + pxAssertMsg(ipu0_data>=amt, "IPU FIFO Overflow on advance!" ); + ipu0_idx += amt; + ipu0_data -= amt; + } +}; + +alignas(16) extern decoder_t decoder; +alignas(16) extern tIPU_BP g_BP; + +MULTI_ISA_DEF( + extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte); + + void IPUWorker(); +) // Quantization matrix extern rgb16_t g_ipu_vqclut[16]; //clut conversion table extern u16 g_ipu_thresh[2]; //thresholds for color conversions alignas(16) extern u8 g_ipu_indx4[16*16/2]; +alignas(16) extern const int non_linear_quantizer_scale[32]; +extern int coded_block_pattern; + +struct mpeg2_scan_pack +{ + u8 norm[64]; + u8 alt[64]; +}; + +alignas(16) extern const std::array g_idct_clip_lut; +alignas(16) extern const mpeg2_scan_pack mpeg2_scan; diff --git a/pcsx2/IPU/IPUdither.cpp b/pcsx2/IPU/IPUdither.cpp index a2d1f07877..a7bf620590 100644 --- a/pcsx2/IPU/IPUdither.cpp +++ b/pcsx2/IPU/IPUdither.cpp @@ -16,12 +16,10 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" -#include "IPUdma.h" -#include "yuv2rgb.h" -#include "mpeg2lib/Mpeg.h" - -#include "GS/MultiISA.h" +#include "IPU/IPU.h" +#include "IPU/IPUdma.h" +#include "IPU/yuv2rgb.h" +#include "IPU/IPU_MultiISA.h" MULTI_ISA_UNSHARED_START diff --git a/pcsx2/IPU/IPUdma.cpp b/pcsx2/IPU/IPUdma.cpp index e7e9f83d48..132b65859c 100644 --- a/pcsx2/IPU/IPUdma.cpp +++ b/pcsx2/IPU/IPUdma.cpp @@ -15,9 +15,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" +#include "IPU/IPU.h" #include "IPU/IPUdma.h" -#include "mpeg2lib/Mpeg.h" +#include "IPU/IPU_MultiISA.h" IPUStatus IPU1Status; bool CommandExecuteQueued; diff --git a/pcsx2/IPU/mpeg2_vlc.h b/pcsx2/IPU/mpeg2_vlc.h new file mode 100644 index 0000000000..997c8d68d4 --- /dev/null +++ b/pcsx2/IPU/mpeg2_vlc.h @@ -0,0 +1,485 @@ +/* + * vlc.h + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * Modified by Florin for PCSX2 emu + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* NOTE: While part of this header is originally from libmpeg2, which is GPL - licensed, + * it's not substantial and does not contain any functions, therefore can be argued + * not to be a derived work. See http://lkml.iu.edu/hypermail/linux/kernel/0301.1/0362.html + * The constants themselves can also be argued to be part of the MPEG-2 standard, whose + * patents expired worldwide in Feb 2020. + */ + +#pragma once +#include + +#ifdef _MSC_VER +#define VLC_ALIGNED16 __declspec(align(16)) +#else +#define VLC_ALIGNED16 __attribute__((aligned(16))) +#endif + +enum macroblock_modes +{ + MACROBLOCK_INTRA = 1, + MACROBLOCK_PATTERN = 2, + MACROBLOCK_MOTION_BACKWARD = 4, + MACROBLOCK_MOTION_FORWARD = 8, + MACROBLOCK_QUANT = 16, + DCT_TYPE_INTERLACED = 32 +}; + +enum motion_type +{ + MOTION_TYPE_SHIFT = 6, + MOTION_TYPE_MASK = (3 * 64), + MOTION_TYPE_BASE = 64, + MC_FIELD = (1 * 64), + MC_FRAME = (2 * 64), + MC_16X8 = (2 * 64), + MC_DMV = (3 * 64) +}; + +/* picture structure */ +enum picture_structure +{ + TOP_FIELD = 1, + BOTTOM_FIELD = 2, + FRAME_PICTURE = 3 +}; + +/* picture coding type */ +enum picture_coding_type +{ + I_TYPE = 1, + P_TYPE = 2, + B_TYPE = 3, + D_TYPE = 4 +}; + +struct MBtab +{ + std::uint8_t modes; + std::uint8_t len; +}; + +struct MVtab +{ + std::uint8_t delta; + std::uint8_t len; +}; + +struct DMVtab +{ + std::int8_t dmv; + std::uint8_t len; +}; + +struct CBPtab +{ + std::uint8_t cbp; + std::uint8_t len; +}; + +struct DCtab +{ + std::uint8_t size; + std::uint8_t len; +}; + +struct DCTtab +{ + std::uint8_t run; + std::uint8_t level; + std::uint8_t len; +}; + +struct MBAtab +{ + std::uint8_t mba; + std::uint8_t len; +}; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static constexpr MBtab MB_I[] = { + {INTRA | QUANT, 2}, {INTRA, 1}}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static constexpr VLC_ALIGNED16 MBtab MB_P[] = { + {INTRA | QUANT, 6}, {CODED | QUANT, 5}, {MC | CODED | QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD + +static constexpr VLC_ALIGNED16 MBtab MB_B[] = { + {0, 0}, {INTRA | QUANT, 6}, + {BWD | CODED | QUANT, 6}, {FWD | CODED | QUANT, 6}, + {INTER | CODED | QUANT, 5}, {INTER | CODED | QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD | CODED, 4}, {FWD | CODED, 4}, {FWD | CODED, 4}, {FWD | CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, + {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static constexpr MVtab MV_4[] = { + {3, 6}, {2, 4}, {1, 3}, {1, 3}, {0, 2}, {0, 2}, {0, 2}, {0, 2}}; + +static constexpr VLC_ALIGNED16 MVtab MV_10[] = { + {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, + {0, 10}, {0, 10}, {0, 10}, {0, 10}, {15, 10}, {14, 10}, {13, 10}, {12, 10}, + {11, 10}, {10, 10}, {9, 9}, {9, 9}, {8, 9}, {8, 9}, {7, 9}, {7, 9}, + {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, + {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, + {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}}; + + +static constexpr DMVtab DMV_2[] = { + {0, 1}, {0, 1}, {1, 2}, {-1, 2}}; + + +static constexpr VLC_ALIGNED16 CBPtab CBP_7[] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}}; + +static constexpr VLC_ALIGNED16 CBPtab CBP_9[] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}}; + +struct MBAtabSet +{ + MBAtab mba5[30]; + MBAtab mba11[26 * 4]; +}; +static constexpr VLC_ALIGNED16 MBAtabSet MBA = { + {// mba5 + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}, + + {// mba11 + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {9, 8}, {9, 8}, {9, 8}, {9, 8}, + {9, 8}, {9, 8}, {9, 8}, {9, 8}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}}}; + +struct DCtabSet +{ + DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 + DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 + DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 + DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 +}; + +static constexpr VLC_ALIGNED16 DCtabSet DCtable = + { + // lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ + {{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0}}, + + /* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ + {{7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}}, + + /* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ + {{0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0}}, + + /* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ + {{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, + {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10, 10}, {11, 10}}, +}; + +struct DCTtabSet +{ + DCTtab first[12]; + DCTtab next[12]; + + DCTtab tab0[60]; + DCTtab tab0a[252]; + DCTtab tab1[8]; + DCTtab tab1a[8]; + + DCTtab tab2[16]; + DCTtab tab3[16]; + DCTtab tab4[16]; + DCTtab tab5[16]; + DCTtab tab6[16]; +}; + +static constexpr VLC_ALIGNED16 DCTtabSet DCT = + { + /* first[12]: Table B-14, DCT coefficients table zero, + * codes 0100 ... 1xxx (used for first (DC) coefficient) + */ + {{0, 2, 4}, {2, 1, 4}, {1, 1, 3}, {1, 1, 3}, + {0, 1, 1}, {0, 1, 1}, {0, 1, 1}, {0, 1, 1}, + {0, 1, 1}, {0, 1, 1}, {0, 1, 1}, {0, 1, 1}}, + + /* next[12]: Table B-14, DCT coefficients table zero, + * codes 0100 ... 1xxx (used for all other coefficients) + */ + {{0, 2, 4}, {2, 1, 4}, {1, 1, 3}, {1, 1, 3}, + {64, 0, 2}, {64, 0, 2}, {64, 0, 2}, {64, 0, 2}, /* EOB */ + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}}, + + /* tab0[60]: Table B-14, DCT coefficients table zero, + * codes 000001xx ... 00111xxx + */ + {{65, 0, 6}, {65, 0, 6}, {65, 0, 6}, {65, 0, 6}, /* Escape */ + {2, 2, 7}, {2, 2, 7}, {9, 1, 7}, {9, 1, 7}, + {0, 4, 7}, {0, 4, 7}, {8, 1, 7}, {8, 1, 7}, + {7, 1, 6}, {7, 1, 6}, {7, 1, 6}, {7, 1, 6}, + {6, 1, 6}, {6, 1, 6}, {6, 1, 6}, {6, 1, 6}, + {1, 2, 6}, {1, 2, 6}, {1, 2, 6}, {1, 2, 6}, + {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, + {13, 1, 8}, {0, 6, 8}, {12, 1, 8}, {11, 1, 8}, + {3, 2, 8}, {1, 3, 8}, {0, 5, 8}, {10, 1, 8}, + {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, + {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, + {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, + {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}}, + + /* tab0a[252]: Table B-15, DCT coefficients table one, + * codes 000001xx ... 11111111 + */ + {{65, 0, 6}, {65, 0, 6}, {65, 0, 6}, {65, 0, 6}, /* Escape */ + {7, 1, 7}, {7, 1, 7}, {8, 1, 7}, {8, 1, 7}, + {6, 1, 7}, {6, 1, 7}, {2, 2, 7}, {2, 2, 7}, + {0, 7, 6}, {0, 7, 6}, {0, 7, 6}, {0, 7, 6}, + {0, 6, 6}, {0, 6, 6}, {0, 6, 6}, {0, 6, 6}, + {4, 1, 6}, {4, 1, 6}, {4, 1, 6}, {4, 1, 6}, + {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, + {1, 5, 8}, {11, 1, 8}, {0, 11, 8}, {0, 10, 8}, + {13, 1, 8}, {12, 1, 8}, {3, 2, 8}, {1, 4, 8}, + {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, + {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, + {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, + {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, /* EOB */ + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, + {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, + {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, + {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, + {9, 1, 7}, {9, 1, 7}, {1, 3, 7}, {1, 3, 7}, + {10, 1, 7}, {10, 1, 7}, {0, 8, 7}, {0, 8, 7}, + {0, 9, 7}, {0, 9, 7}, {0, 12, 8}, {0, 13, 8}, + {2, 3, 8}, {4, 2, 8}, {0, 14, 8}, {0, 15, 8}}, + + /* Table B-14, DCT coefficients table zero, + * codes 0000001000 ... 0000001111 + */ + {{16, 1, 10}, {5, 2, 10}, {0, 7, 10}, {2, 3, 10}, + {1, 4, 10}, {15, 1, 10}, {14, 1, 10}, {4, 2, 10}}, + + /* Table B-15, DCT coefficients table one, + * codes 000000100x ... 000000111x + */ + {{5, 2, 9}, {5, 2, 9}, {14, 1, 9}, {14, 1, 9}, + {2, 4, 10}, {16, 1, 10}, {15, 1, 9}, {15, 1, 9}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 000000010000 ... 000000011111 + */ + {{0, 11, 12}, {8, 2, 12}, {4, 3, 12}, {0, 10, 12}, + {2, 4, 12}, {7, 2, 12}, {21, 1, 12}, {20, 1, 12}, + {0, 9, 12}, {19, 1, 12}, {18, 1, 12}, {1, 5, 12}, + {3, 3, 12}, {0, 8, 12}, {6, 2, 12}, {17, 1, 12}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 0000000010000 ... 0000000011111 + */ + {{10, 2, 13}, {9, 2, 13}, {5, 3, 13}, {3, 4, 13}, + {2, 5, 13}, {1, 7, 13}, {1, 6, 13}, {0, 15, 13}, + {0, 14, 13}, {0, 13, 13}, {0, 12, 13}, {26, 1, 13}, + {25, 1, 13}, {24, 1, 13}, {23, 1, 13}, {22, 1, 13}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 00000000010000 ... 00000000011111 + */ + {{0, 31, 14}, {0, 30, 14}, {0, 29, 14}, {0, 28, 14}, + {0, 27, 14}, {0, 26, 14}, {0, 25, 14}, {0, 24, 14}, + {0, 23, 14}, {0, 22, 14}, {0, 21, 14}, {0, 20, 14}, + {0, 19, 14}, {0, 18, 14}, {0, 17, 14}, {0, 16, 14}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 000000000010000 ... 000000000011111 + */ + {{0, 40, 15}, {0, 39, 15}, {0, 38, 15}, {0, 37, 15}, + {0, 36, 15}, {0, 35, 15}, {0, 34, 15}, {0, 33, 15}, + {0, 32, 15}, {1, 14, 15}, {1, 13, 15}, {1, 12, 15}, + {1, 11, 15}, {1, 10, 15}, {1, 9, 15}, {1, 8, 15}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 0000000000010000 ... 0000000000011111 + */ + {{1, 18, 16}, {1, 17, 16}, {1, 16, 16}, {1, 15, 16}, + {6, 3, 16}, {16, 2, 16}, {15, 2, 16}, {14, 2, 16}, + {13, 2, 16}, {12, 2, 16}, {11, 2, 16}, {31, 1, 16}, + {30, 1, 16}, {29, 1, 16}, {28, 1, 16}, {27, 1, 16}} + +}; + +#undef VLC_ALIGNED16 \ No newline at end of file diff --git a/pcsx2/IPU/mpeg2lib/Idct.cpp b/pcsx2/IPU/mpeg2lib/Idct.cpp deleted file mode 100644 index dd1684b989..0000000000 --- a/pcsx2/IPU/mpeg2lib/Idct.cpp +++ /dev/null @@ -1,271 +0,0 @@ -/* - * idct.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// [TODO] : There are modern SSE versions of idct (idct_mmx.c) in the mpeg2 libs that we -// should probably upgrade to. They use their own raw-style intrinsics and not the intel -// compiler-integrated ones. - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "IPU/IPU.h" -#include "Mpeg.h" - -#include - -#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ -#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ -#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ -#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ -#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ -#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ - -/* - * In legal streams, the IDCT output should be between -384 and +384. - * In corrupted streams, it is possible to force the IDCT output to go - * to +-3826 - this is the worst case for a column IDCT where the - * column inputs are 16-bit values. - */ -alignas(16) extern const std::array g_idct_clip_lut; - -#define CLIP(i) ((g_idct_clip_lut.data()+384)[(i)]) - -MULTI_ISA_UNSHARED_START - -static __fi void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1) -{ -#if 0 - t0 = w0*d0 + w1*d1; - t1 = w0*d1 - w1*d0; -#else - int tmp = w0 * (d0 + d1); - t0 = tmp + (w1 - w0) * d1; - t1 = tmp - (w1 + w0) * d0; -#endif -} - -static __fi void idct_row (s16 * const block) -{ - int d0, d1, d2, d3; - int a0, a1, a2, a3, b0, b1, b2, b3; - int t0, t1, t2, t3; - - /* shortcut */ - if (!(block[1] | ((s32 *)block)[1] | ((s32 *)block)[2] | - ((s32 *)block)[3])) { - u32 tmp = (u16) (block[0] << 3); - tmp |= tmp << 16; - ((s32 *)block)[0] = tmp; - ((s32 *)block)[1] = tmp; - ((s32 *)block)[2] = tmp; - ((s32 *)block)[3] = tmp; - return; - } - - d0 = (block[0] << 11) + 128; - d1 = block[1]; - d2 = block[2] << 11; - d3 = block[3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[4]; - d1 = block[5]; - d2 = block[6]; - d3 = block[7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 -= t2; - t1 -= t3; - b1 = ((t0 + t1) * 181) >> 8; - b2 = ((t0 - t1) * 181) >> 8; - - block[0] = (a0 + b0) >> 8; - block[1] = (a1 + b1) >> 8; - block[2] = (a2 + b2) >> 8; - block[3] = (a3 + b3) >> 8; - block[4] = (a3 - b3) >> 8; - block[5] = (a2 - b2) >> 8; - block[6] = (a1 - b1) >> 8; - block[7] = (a0 - b0) >> 8; -} - -static __fi void idct_col (s16 * const block) -{ - int d0, d1, d2, d3; - int a0, a1, a2, a3, b0, b1, b2, b3; - int t0, t1, t2, t3; - - d0 = (block[8*0] << 11) + 65536; - d1 = block[8*1]; - d2 = block[8*2] << 11; - d3 = block[8*3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[8*4]; - d1 = block[8*5]; - d2 = block[8*6]; - d3 = block[8*7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 = (t0 - t2) >> 8; - t1 = (t1 - t3) >> 8; - b1 = (t0 + t1) * 181; - b2 = (t0 - t1) * 181; - - block[8*0] = (a0 + b0) >> 17; - block[8*1] = (a1 + b1) >> 17; - block[8*2] = (a2 + b2) >> 17; - block[8*3] = (a3 + b3) >> 17; - block[8*4] = (a3 - b3) >> 17; - block[8*5] = (a2 - b2) >> 17; - block[8*6] = (a1 - b1) >> 17; - block[8*7] = (a0 - b0) >> 17; -} - -__ri void mpeg2_idct_copy(s16 * block, u8 * dest, const int stride) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - - __m128 zero = _mm_setzero_ps(); - do { - dest[0] = CLIP (block[0]); - dest[1] = CLIP (block[1]); - dest[2] = CLIP (block[2]); - dest[3] = CLIP (block[3]); - dest[4] = CLIP (block[4]); - dest[5] = CLIP (block[5]); - dest[6] = CLIP (block[6]); - dest[7] = CLIP (block[7]); - - _mm_store_ps((float*)block, zero); - - dest += stride; - block += 8; - } while (--i); -} - - -// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]). -__ri void mpeg2_idct_add (const int last, s16 * block, s16 * dest, const int stride) -{ - // on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0). - - if (last != 129 || (block[0] & 7) == 4) - { - int i; - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - - __m128 zero = _mm_setzero_ps(); - do { - _mm_store_ps((float*)dest, _mm_load_ps((float*)block)); - _mm_store_ps((float*)block, zero); - - dest += stride; - block += 8; - } while (--i); - - } - else - { - s16 DC = ((int)block[0] + 4) >> 3; - s16 dcf[2] = { DC, DC }; - block[0] = block[63] = 0; - - __m128 dc128 = _mm_set_ps1(*(float*)dcf); - - for(int i=0; i<8; ++i) - _mm_store_ps((float*)(dest+(stride*i)), dc128); - } -} - -MULTI_ISA_UNSHARED_END - -#if MULTI_ISA_COMPILE_ONCE - -static constexpr std::array make_clip_lut() -{ - std::array lut = {}; - for (int i = -384; i < 640; i++) - lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); - return lut; -} - -static constexpr mpeg2_scan_pack make_scan_pack() -{ - constexpr u8 mpeg2_scan_norm[64] = { - /* Zig-Zag scan pattern */ - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 - }; - - constexpr u8 mpeg2_scan_alt[64] = { - /* Alternate scan pattern */ - 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, - 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, - 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, - 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 - }; - - mpeg2_scan_pack pack = {}; - - for (int i = 0; i < 64; i++) { - int j = mpeg2_scan_norm[i]; - pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - j = mpeg2_scan_alt[i]; - pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - } - - return pack; -} - -alignas(16) constexpr std::array g_idct_clip_lut = make_clip_lut(); -alignas(16) constexpr mpeg2_scan_pack mpeg2_scan = make_scan_pack(); - -#endif diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp deleted file mode 100644 index 664f69c7a0..0000000000 --- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp +++ /dev/null @@ -1,1285 +0,0 @@ -/* - * Mpeg.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// [Air] Note: many functions in this module are large and only used once, so they -// have been forced to inline since it won't bloat the program and gets rid of -// some call overhead. - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "IPU/IPU.h" -#include "Mpeg.h" -#include "Vlc.h" - -#include "GS/MultiISA.h" - -#include "common/MemsetFast.inl" - -#if MULTI_ISA_COMPILE_ONCE - -const int non_linear_quantizer_scale [] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 -}; - -#endif - -MULTI_ISA_UNSHARED_START - -/* Bitstream and buffer needs to be reallocated in order for successful - reading of the old data. Here the old data stored in the 2nd slot - of the internal buffer is copied to 1st slot, and the new data read - into 1st slot is copied to the 2nd slot. Which will later be copied - back to the 1st slot when 128bits have been read. -*/ -const DCTtab * tab; -int mbaCount = 0; - -int bitstream_init () -{ - return g_BP.FillBuffer(32); -} - -int get_macroblock_modes() -{ - int macroblock_modes; - const MBtab * tab; - - switch (decoder.coding_type) - { - case I_TYPE: - macroblock_modes = UBITS(2); - - if (macroblock_modes == 0) return 0; // error - - tab = MB_I + (macroblock_modes >> 1); - DUMPBITS(tab->len); - macroblock_modes = tab->modes; - - if ((!(decoder.frame_pred_frame_dct)) && - (decoder.picture_structure == FRAME_PICTURE)) - { - macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; - } - return macroblock_modes; - - case P_TYPE: - macroblock_modes = UBITS(6); - - if (macroblock_modes == 0) return 0; // error - - tab = MB_P + (macroblock_modes >> 1); - DUMPBITS(tab->len); - macroblock_modes = tab->modes; - - if (decoder.picture_structure != FRAME_PICTURE) - { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - { - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - } - - return macroblock_modes; - } - else if (decoder.frame_pred_frame_dct) - { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - - return macroblock_modes; - } - else - { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - { - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - } - - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) - { - macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; - } - - return macroblock_modes; - } - - case B_TYPE: - macroblock_modes = UBITS(6); - - if (macroblock_modes == 0) return 0; // error - - tab = MB_B + macroblock_modes; - DUMPBITS(tab->len); - macroblock_modes = tab->modes; - - if (decoder.picture_structure != FRAME_PICTURE) - { - if (!(macroblock_modes & MACROBLOCK_INTRA)) - { - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - } - return (macroblock_modes | (tab->len << 16)); - } - else if (decoder.frame_pred_frame_dct) - { - /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME; - return (macroblock_modes | (tab->len << 16)); - } - else - { - if (macroblock_modes & MACROBLOCK_INTRA) goto intra; - - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) - { -intra: - macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; - } - return (macroblock_modes | (tab->len << 16)); - } - - case D_TYPE: - macroblock_modes = GETBITS(1); - //I suspect (as this is actually a 2 bit command) that this should be getbits(2) - //additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction) - DevCon.Warning(" Rare MPEG command! "); - if (macroblock_modes == 0) return 0; // error - return (MACROBLOCK_INTRA | (1 << 16)); - - default: - return 0; - } -} - -static __fi int get_quantizer_scale() -{ - int quantizer_scale_code; - - quantizer_scale_code = GETBITS(5); - - if (decoder.q_scale_type) - return non_linear_quantizer_scale [quantizer_scale_code]; - else - return quantizer_scale_code << 1; -} - -static __fi int get_coded_block_pattern() -{ - const CBPtab * tab; - u16 code = UBITS(16); - - if (code >= 0x2000) - tab = CBP_7 + (UBITS(7) - 16); - else - tab = CBP_9 + UBITS(9); - - DUMPBITS(tab->len); - return tab->cbp; -} - -int __fi get_motion_delta(const int f_code) -{ - int delta; - int sign; - const MVtab * tab; - u16 code = UBITS(16); - - if ((code & 0x8000)) - { - DUMPBITS(1); - return 0x00010000; - } - else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00)) - { - tab = MV_4 + UBITS(4); - } - else - { - tab = MV_10 + UBITS(10); - } - - delta = tab->delta + 1; - DUMPBITS(tab->len); - - sign = SBITS(1); - DUMPBITS(1); - - return (((delta ^ sign) - sign) | (tab->len << 16)); -} - -int __fi get_dmv() -{ - const DMVtab* tab = DMV_2 + UBITS(2); - DUMPBITS(tab->len); - return (tab->dmv | (tab->len << 16)); -} - -int get_macroblock_address_increment() -{ - const MBAtab *mba; - - u16 code = UBITS(16); - - if (code >= 4096) - mba = MBA.mba5 + (UBITS(5) - 2); - else if (code >= 768) - mba = MBA.mba11 + (UBITS(11) - 24); - else switch (UBITS(11)) - { - case 8: /* macroblock_escape */ - DUMPBITS(11); - return 0xb0023; - - case 15: /* macroblock_stuffing (MPEG1 only) */ - if (decoder.mpeg1) - { - DUMPBITS(11); - return 0xb0022; - } - [[fallthrough]]; - - default: - return 0;//error - } - - DUMPBITS(mba->len); - - return ((mba->mba + 1) | (mba->len << 16)); -} - -static __fi int get_luma_dc_dct_diff() -{ - int size; - int dc_diff; - u16 code = UBITS(5); - - if (code < 31) - { - size = DCtable.lum0[code].size; - DUMPBITS(DCtable.lum0[code].len); - - // 5 bits max - } - else - { - code = UBITS(9) - 0x1f0; - size = DCtable.lum1[code].size; - DUMPBITS(DCtable.lum1[code].len); - - // 9 bits max - } - - if (size==0) - dc_diff = 0; - else - { - dc_diff = GETBITS(size); - - // 6 for tab0 and 11 for tab1 - if ((dc_diff & (1<<(size-1)))==0) - dc_diff-= (1< 4095) - val = (val >> 31) ^ 2047; -} - -static bool get_intra_block() -{ - const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; - const u8 (&quant_matrix)[64] = decoder.iq; - int quantizer_scale = decoder.quantizer_scale; - s16 * dest = decoder.DCTblock; - u16 code; - - /* decode AC coefficients */ - for (int i=1 + ipu_cmd.pos[4]; ; i++) - { - switch (ipu_cmd.pos[5]) - { - case 0: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i - 1; - return false; - } - - code = UBITS(16); - - if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1)) - { - tab = &DCT.next[(code >> 12) - 4]; - } - else if (code >= 1024) - { - if (decoder.intra_vlc_format && !decoder.mpeg1) - { - tab = &DCT.tab0a[(code >> 8) - 4]; - } - else - { - tab = &DCT.tab0[(code >> 8) - 4]; - } - } - else if (code >= 512) - { - if (decoder.intra_vlc_format && !decoder.mpeg1) - { - tab = &DCT.tab1a[(code >> 6) - 8]; - } - else - { - tab = &DCT.tab1[(code >> 6) - 8]; - } - } - - // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup - // that should use a single unrolled DCT table instead of five separate tables used - // here. Multiple conditional statements are very slow, while modern CPU data caches - // have lots of room to spare. - - else if (code >= 256) - { - tab = &DCT.tab2[(code >> 4) - 16]; - } - else if (code >= 128) - { - tab = &DCT.tab3[(code >> 3) - 16]; - } - else if (code >= 64) - { - tab = &DCT.tab4[(code >> 2) - 16]; - } - else if (code >= 32) - { - tab = &DCT.tab5[(code >> 1) - 16]; - } - else if (code >= 16) - { - tab = &DCT.tab6[code - 16]; - } - else - { - ipu_cmd.pos[4] = 0; - return true; - } - - DUMPBITS(tab->len); - - if (tab->run==64) /* end_of_block */ - { - ipu_cmd.pos[4] = 0; - return true; - } - - i += (tab->run == 65) ? GETBITS(6) : tab->run; - if (i >= 64) - { - ipu_cmd.pos[4] = 0; - return true; - } - [[fallthrough]]; - - case 1: - { - if (!GETWORD()) - { - ipu_cmd.pos[4] = i - 1; - ipu_cmd.pos[5] = 1; - return false; - } - - uint j = scan[i]; - int val; - - if (tab->run==65) /* escape */ - { - if(!decoder.mpeg1) - { - val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; - DUMPBITS(12); - } - else - { - val = SBITS(8); - DUMPBITS(8); - - if (!(val & 0x7f)) - { - val = GETBITS(8) + 2 * val; - } - - val = (val * quantizer_scale * quant_matrix[i]) >> 4; - val = (val + ~ (((s32)val) >> 31)) | 1; - } - } - else - { - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; - if(decoder.mpeg1) - { - /* oddification */ - val = (val - 1) | 1; - } - - /* if (bitstream_get (1)) val = -val; */ - int bit1 = SBITS(1); - val = (val ^ bit1) - bit1; - DUMPBITS(1); - } - - SATURATE(val); - dest[j] = val; - ipu_cmd.pos[5] = 0; - } - } - } - - ipu_cmd.pos[4] = 0; - return true; -} - -static bool get_non_intra_block(int * last) -{ - int i; - int j; - int val; - const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; - const u8 (&quant_matrix)[64] = decoder.niq; - int quantizer_scale = decoder.quantizer_scale; - s16 * dest = decoder.DCTblock; - u16 code; - - /* decode AC coefficients */ - for (i= ipu_cmd.pos[4] ; ; i++) - { - switch (ipu_cmd.pos[5]) - { - case 0: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i; - return false; - } - - code = UBITS(16); - - if (code >= 16384) - { - if (i==0) - { - tab = &DCT.first[(code >> 12) - 4]; - } - else - { - tab = &DCT.next[(code >> 12)- 4]; - } - } - else if (code >= 1024) - { - tab = &DCT.tab0[(code >> 8) - 4]; - } - else if (code >= 512) - { - tab = &DCT.tab1[(code >> 6) - 8]; - } - - // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup - // that should use a single unrolled DCT table instead of five separate tables used - // here. Multiple conditional statements are very slow, while modern CPU data caches - // have lots of room to spare. - - else if (code >= 256) - { - tab = &DCT.tab2[(code >> 4) - 16]; - } - else if (code >= 128) - { - tab = &DCT.tab3[(code >> 3) - 16]; - } - else if (code >= 64) - { - tab = &DCT.tab4[(code >> 2) - 16]; - } - else if (code >= 32) - { - tab = &DCT.tab5[(code >> 1) - 16]; - } - else if (code >= 16) - { - tab = &DCT.tab6[code - 16]; - } - else - { - ipu_cmd.pos[4] = 0; - return true; - } - - DUMPBITS(tab->len); - - if (tab->run==64) /* end_of_block */ - { - *last = i; - ipu_cmd.pos[4] = 0; - return true; - } - - i += (tab->run == 65) ? GETBITS(6) : tab->run; - if (i >= 64) - { - *last = i; - ipu_cmd.pos[4] = 0; - return true; - } - [[fallthrough]]; - - case 1: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i; - ipu_cmd.pos[5] = 1; - return false; - } - - j = scan[i]; - - if (tab->run==65) /* escape */ - { - if (!decoder.mpeg1) - { - val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5; - DUMPBITS(12); - } - else - { - val = SBITS(8); - DUMPBITS(8); - - if (!(val & 0x7f)) - { - val = GETBITS(8) + 2 * val; - } - - val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32; - val = (val + ~ (((s32)val) >> 31)) | 1; - } - } - else - { - int bit1 = SBITS(1); - val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; - val = (val ^ bit1) - bit1; - DUMPBITS(1); - } - - SATURATE(val); - dest[j] = val; - ipu_cmd.pos[5] = 0; - } - } - - ipu_cmd.pos[4] = 0; - return true; -} - -static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) -{ - if (!skip || ipu_cmd.pos[3]) - { - ipu_cmd.pos[3] = 0; - if (!GETWORD()) - { - ipu_cmd.pos[3] = 1; - return false; - } - - /* Get the intra DC coefficient and inverse quantize it */ - if (cc == 0) - decoder.dc_dct_pred[0] += get_luma_dc_dct_diff(); - else - decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff(); - - decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision); - } - - if (!get_intra_block()) - { - return false; - } - - mpeg2_idct_copy(decoder.DCTblock, dest, stride); - - return true; -} - -static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) -{ - int last; - - if (!skip) - { - memzero_sse_a(decoder.DCTblock); - } - - if (!get_non_intra_block(&last)) - { - return false; - } - - mpeg2_idct_add(last, decoder.DCTblock, dest, stride); - - return true; -} - -void __fi finishmpeg2sliceIDEC() -{ - ipuRegs.ctrl.SCD = 0; - coded_block_pattern = decoder.coded_block_pattern; -} - -__fi bool mpeg2sliceIDEC() -{ - u16 code; - - switch (ipu_cmd.pos[0]) - { - case 0: - decoder.dc_dct_pred[0] = - decoder.dc_dct_pred[1] = - decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; - - ipuRegs.top = 0; - ipuRegs.ctrl.ECD = 0; - [[fallthrough]]; - - case 1: - ipu_cmd.pos[0] = 1; - if (!bitstream_init()) - { - return false; - } - [[fallthrough]]; - - case 2: - ipu_cmd.pos[0] = 2; - while (1) - { - // IPU0 isn't ready for data, so let's wait for it to be - if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[1] <= 2) - { - return false; - } - macroblock_8& mb8 = decoder.mb8; - macroblock_rgb16& rgb16 = decoder.rgb16; - macroblock_rgb32& rgb32 = decoder.rgb32; - - int DCT_offset, DCT_stride; - const MBAtab * mba; - - switch (ipu_cmd.pos[1]) - { - case 0: - decoder.macroblock_modes = get_macroblock_modes(); - - if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC - { - decoder.quantizer_scale = get_quantizer_scale(); - } - - decoder.coded_block_pattern = 0x3F;//all 6 blocks - memzero_sse_a(mb8); - memzero_sse_a(rgb32); - [[fallthrough]]; - - case 1: - ipu_cmd.pos[1] = 1; - - if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) - { - DCT_offset = decoder_stride; - DCT_stride = decoder_stride * 2; - } - else - { - DCT_offset = decoder_stride * 8; - DCT_stride = decoder_stride; - } - - switch (ipu_cmd.pos[2]) - { - case 0: - case 1: - if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1)) - { - ipu_cmd.pos[2] = 1; - return false; - } - [[fallthrough]]; - - case 2: - if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2)) - { - ipu_cmd.pos[2] = 2; - return false; - } - [[fallthrough]]; - - case 3: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3)) - { - ipu_cmd.pos[2] = 3; - return false; - } - [[fallthrough]]; - - case 4: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4)) - { - ipu_cmd.pos[2] = 4; - return false; - } - [[fallthrough]]; - - case 5: - if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5)) - { - ipu_cmd.pos[2] = 5; - return false; - } - [[fallthrough]]; - - case 6: - if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6)) - { - ipu_cmd.pos[2] = 6; - return false; - } - break; - - jNO_DEFAULT; - } - - // Send The MacroBlock via DmaIpuFrom - ipu_csc(mb8, rgb32, decoder.sgn); - - if (decoder.ofm == 0) - decoder.SetOutputTo(rgb32); - else - { - ipu_dither(rgb32, rgb16, decoder.dte); - decoder.SetOutputTo(rgb16); - } - [[fallthrough]]; - - case 2: - { - - pxAssert(decoder.ipu0_data > 0); - - uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); - decoder.AdvanceIpuDataBy(read); - - if (decoder.ipu0_data != 0) - { - // IPU FIFO filled up -- Will have to finish transferring later. - ipu_cmd.pos[1] = 2; - return false; - } - - mbaCount = 0; - if (read) - { - ipu_cmd.pos[1] = 3; - return false; - } - } - [[fallthrough]]; - - case 3: - while (1) - { - if (!GETWORD()) - { - ipu_cmd.pos[1] = 3; - return false; - } - - code = UBITS(16); - if (code >= 0x1000) - { - mba = MBA.mba5 + (UBITS(5) - 2); - break; - } - else if (code >= 0x0300) - { - mba = MBA.mba11 + (UBITS(11) - 24); - break; - } - else switch (UBITS(11)) - { - case 8: /* macroblock_escape */ - mbaCount += 33; - [[fallthrough]]; - - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS(11); - continue; - - default: /* end of slice/frame, or error? */ - { - goto finish_idec; - } - } - } - - DUMPBITS(mba->len); - mbaCount += mba->mba; - - if (mbaCount) - { - decoder.dc_dct_pred[0] = - decoder.dc_dct_pred[1] = - decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; - } - [[fallthrough]]; - - case 4: - if (!GETWORD()) - { - ipu_cmd.pos[1] = 4; - return false; - } - break; - - jNO_DEFAULT; - } - - ipu_cmd.pos[1] = 0; - ipu_cmd.pos[2] = 0; - } - -finish_idec: - finishmpeg2sliceIDEC(); - [[fallthrough]]; - - case 3: - { - u8 bit8; - u32 start_check; - if (!getBits8((u8*)&bit8, 0)) - { - ipu_cmd.pos[0] = 3; - return false; - } - - if (bit8 == 0) - { - g_BP.Align(); - do - { - if (!g_BP.FillBuffer(24)) - { - ipu_cmd.pos[0] = 3; - return false; - } - start_check = UBITS(24); - if (start_check != 0) - { - if (start_check == 1) - { - ipuRegs.ctrl.SCD = 1; - } - else - { - ipuRegs.ctrl.ECD = 1; - } - break; - } - DUMPBITS(8); - } while (1); - } - } - [[fallthrough]]; - - case 4: - if (!getBits32((u8*)&ipuRegs.top, 0)) - { - ipu_cmd.pos[0] = 4; - return false; - } - - ipuRegs.top = BigEndian(ipuRegs.top); - break; - - jNO_DEFAULT; - } - - return true; -} - -__fi bool mpeg2_slice() -{ - int DCT_offset, DCT_stride; - - macroblock_8& mb8 = decoder.mb8; - macroblock_16& mb16 = decoder.mb16; - - switch (ipu_cmd.pos[0]) - { - case 0: - if (decoder.dcr) - { - decoder.dc_dct_pred[0] = - decoder.dc_dct_pred[1] = - decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; - } - - ipuRegs.ctrl.ECD = 0; - ipuRegs.top = 0; - memzero_sse_a(mb8); - memzero_sse_a(mb16); - [[fallthrough]]; - - case 1: - if (!bitstream_init()) - { - ipu_cmd.pos[0] = 1; - return false; - } - [[fallthrough]]; - - case 2: - ipu_cmd.pos[0] = 2; - - // IPU0 isn't ready for data, so let's wait for it to be - if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[0] <= 3) - { - return false; - } - - if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) - { - DCT_offset = decoder_stride; - DCT_stride = decoder_stride * 2; - } - else - { - DCT_offset = decoder_stride * 8; - DCT_stride = decoder_stride; - } - - if (decoder.macroblock_modes & MACROBLOCK_INTRA) - { - switch(ipu_cmd.pos[1]) - { - case 0: - decoder.coded_block_pattern = 0x3F; - [[fallthrough]]; - - case 1: - if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1)) - { - ipu_cmd.pos[1] = 1; - return false; - } - [[fallthrough]]; - - case 2: - if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) - { - ipu_cmd.pos[1] = 2; - return false; - } - [[fallthrough]]; - - case 3: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) - { - ipu_cmd.pos[1] = 3; - return false; - } - [[fallthrough]]; - - case 4: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) - { - ipu_cmd.pos[1] = 4; - return false; - } - [[fallthrough]]; - - case 5: - if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) - { - ipu_cmd.pos[1] = 5; - return false; - } - [[fallthrough]]; - - case 6: - if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) - { - ipu_cmd.pos[1] = 6; - return false; - } - break; - - jNO_DEFAULT; - } - - // Copy macroblock8 to macroblock16 - without sign extension. - // Manually inlined due to MSVC refusing to inline the SSE-optimized version. - { - const u8 *s = (const u8*)&mb8; - u16 *d = (u16*)&mb16; - - //Y bias - 16 * 16 - //Cr bias - 8 * 8 - //Cb bias - 8 * 8 - - __m128i zeroreg = _mm_setzero_si128(); - - for (uint i = 0; i < (256+64+64) / 32; ++i) - { - //*d++ = *s++; - __m128i woot1 = _mm_load_si128((__m128i*)s); - __m128i woot2 = _mm_load_si128((__m128i*)s+1); - _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg)); - _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg)); - _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg)); - _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg)); - s += 32; - d += 32; - } - } - } - else - { - if (decoder.macroblock_modes & MACROBLOCK_PATTERN) - { - switch(ipu_cmd.pos[1]) - { - case 0: - decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits - [[fallthrough]]; - - case 1: - if (decoder.coded_block_pattern & 0x20) - { - if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1)) - { - ipu_cmd.pos[1] = 1; - return false; - } - } - [[fallthrough]]; - - case 2: - if (decoder.coded_block_pattern & 0x10) - { - if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) - { - ipu_cmd.pos[1] = 2; - return false; - } - } - [[fallthrough]]; - - case 3: - if (decoder.coded_block_pattern & 0x08) - { - if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) - { - ipu_cmd.pos[1] = 3; - return false; - } - } - [[fallthrough]]; - - case 4: - if (decoder.coded_block_pattern & 0x04) - { - if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) - { - ipu_cmd.pos[1] = 4; - return false; - } - } - [[fallthrough]]; - - case 5: - if (decoder.coded_block_pattern & 0x2) - { - if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) - { - ipu_cmd.pos[1] = 5; - return false; - } - } - [[fallthrough]]; - - case 6: - if (decoder.coded_block_pattern & 0x1) - { - if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) - { - ipu_cmd.pos[1] = 6; - return false; - } - } - break; - - jNO_DEFAULT; - } - } - } - - // Send The MacroBlock via DmaIpuFrom - ipuRegs.ctrl.SCD = 0; - coded_block_pattern = decoder.coded_block_pattern; - - decoder.SetOutputTo(mb16); - [[fallthrough]]; - - case 3: - { - pxAssert(decoder.ipu0_data > 0); - - uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); - decoder.AdvanceIpuDataBy(read); - - if (decoder.ipu0_data != 0) - { - // IPU FIFO filled up -- Will have to finish transferring later. - ipu_cmd.pos[0] = 3; - return false; - } - - mbaCount = 0; - if (read) - { - ipu_cmd.pos[0] = 4; - return false; - } - } - [[fallthrough]]; - - case 4: - { - u8 bit8; - u32 start_check; - if (!getBits8((u8*)&bit8, 0)) - { - ipu_cmd.pos[0] = 4; - return false; - } - - if (bit8 == 0) - { - g_BP.Align(); - do - { - if (!g_BP.FillBuffer(24)) - { - ipu_cmd.pos[0] = 4; - return false; - } - start_check = UBITS(24); - if (start_check != 0) - { - if (start_check == 1) - { - ipuRegs.ctrl.SCD = 1; - } - else - { - ipuRegs.ctrl.ECD = 1; - } - break; - } - DUMPBITS(8); - } while (1); - } - } - [[fallthrough]]; - - case 5: - if (!getBits32((u8*)&ipuRegs.top, 0)) - { - ipu_cmd.pos[0] = 5; - return false; - } - - ipuRegs.top = BigEndian(ipuRegs.top); - break; - } - - return true; -} - -MULTI_ISA_UNSHARED_END diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h deleted file mode 100644 index d5ea1132bd..0000000000 --- a/pcsx2/IPU/mpeg2lib/Mpeg.h +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Mpeg.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -#include "IPU/IPU.h" - -#include "GS/MultiISA.h" - -#include "common/Assertions.h" - -// the IPU is fixed to 16 byte strides (128-bit / QWC resolution): -static const uint decoder_stride = 16; - -enum macroblock_modes -{ - MACROBLOCK_INTRA = 1, - MACROBLOCK_PATTERN = 2, - MACROBLOCK_MOTION_BACKWARD = 4, - MACROBLOCK_MOTION_FORWARD = 8, - MACROBLOCK_QUANT = 16, - DCT_TYPE_INTERLACED = 32 -}; - -enum motion_type -{ - MOTION_TYPE_SHIFT = 6, - MOTION_TYPE_MASK = (3*64), - MOTION_TYPE_BASE = 64, - MC_FIELD = (1*64), - MC_FRAME = (2*64), - MC_16X8 = (2*64), - MC_DMV = (3*64) -}; - -/* picture structure */ -enum picture_structure -{ - TOP_FIELD = 1, - BOTTOM_FIELD = 2, - FRAME_PICTURE = 3 -}; - -/* picture coding type */ -enum picture_coding_type -{ - I_TYPE = 1, - P_TYPE = 2, - B_TYPE = 3, - D_TYPE = 4 -}; - -struct macroblock_8{ - u8 Y[16][16]; //0 - u8 Cb[8][8]; //1 - u8 Cr[8][8]; //2 -}; - -struct macroblock_16{ - s16 Y[16][16]; //0 - s16 Cb[8][8]; //1 - s16 Cr[8][8]; //2 -}; - -struct macroblock_rgb32{ - struct { - u8 r, g, b, a; - } c[16][16]; -}; - -struct rgb16_t{ - u16 r:5, g:5, b:5, a:1; -}; - -struct macroblock_rgb16{ - rgb16_t c[16][16]; -}; - -struct decoder_t { - /* first, state that carries information from one macroblock to the */ - /* next inside a slice, and is never used outside of mpeg2_slice() */ - - /* DCT coefficients - should be kept aligned ! */ - s16 DCTblock[64]; - - u8 niq[64]; //non-intraquant matrix (sequence header) - u8 iq[64]; //intraquant matrix (sequence header) - - macroblock_8 mb8; - macroblock_16 mb16; - macroblock_rgb32 rgb32; - macroblock_rgb16 rgb16; - - uint ipu0_data; // amount of data in the output macroblock (in QWC) - uint ipu0_idx; - - int quantizer_scale; - - /* now non-slice-specific information */ - - /* picture header stuff */ - - /* what type of picture this is (I, P, B, D) */ - int coding_type; - - /* picture coding extension stuff */ - - /* predictor for DC coefficients in intra blocks */ - s16 dc_dct_pred[3]; - - /* quantization factor for intra dc coefficients */ - int intra_dc_precision; - /* top/bottom/both fields */ - int picture_structure; - /* bool to indicate all predictions are frame based */ - int frame_pred_frame_dct; - /* bool to indicate whether intra blocks have motion vectors */ - /* (for concealment) */ - int concealment_motion_vectors; - /* bit to indicate which quantization table to use */ - int q_scale_type; - /* bool to use different vlc tables */ - int intra_vlc_format; - /* used for DMV MC */ - int top_field_first; - // Pseudo Sign Offset - int sgn; - // Dither Enable - int dte; - // Output Format - int ofm; - // Macroblock type - int macroblock_modes; - // DC Reset - int dcr; - // Coded block pattern - int coded_block_pattern; - - /* stuff derived from bitstream */ - - /* the zigzag scan we're supposed to be using, true for alt, false for normal */ - bool scantype; - - int mpeg1; - - template< typename T > - void SetOutputTo( T& obj ) - { - uint mb_offset = ((uptr)&obj - (uptr)&mb8); - pxAssume( (mb_offset & 15) == 0 ); - ipu0_idx = mb_offset / 16; - ipu0_data = sizeof(obj)/16; - } - - u128* GetIpuDataPtr() - { - return ((u128*)&mb8) + ipu0_idx; - } - - void AdvanceIpuDataBy(uint amt) - { - pxAssertMsg(ipu0_data>=amt, "IPU FIFO Overflow on advance!" ); - ipu0_idx += amt; - ipu0_data -= amt; - } -}; - -struct mpeg2_scan_pack -{ - u8 norm[64]; - u8 alt[64]; -}; - -extern u32 UBITS(uint bits); -extern s32 SBITS(uint bits); - -MULTI_ISA_DEF( - extern int bitstream_init(); - - extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride); - extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride); - - extern bool mpeg2sliceIDEC(); - extern bool mpeg2_slice(); - extern int get_macroblock_address_increment(); - extern int get_macroblock_modes(); - - extern int get_motion_delta(const int f_code); - extern int get_dmv(); - - extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn); - extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte); - extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4); - - extern int slice (u8 * buffer); -) - -#ifdef _MSC_VER -#define BigEndian(in) _byteswap_ulong(in) -#else -#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap... -#endif - -#ifdef _MSC_VER -#define BigEndian64(in) _byteswap_uint64(in) -#else -#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap... -#endif - -alignas(16) extern const mpeg2_scan_pack mpeg2_scan; -extern const int non_linear_quantizer_scale[]; - -// The IPU can only do one task at once and never uses other buffers so all mpeg state variables -// are made available to mpeg/vlc modules as globals here: - -alignas(16) extern tIPU_BP g_BP; -alignas(16) extern decoder_t decoder; - diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h deleted file mode 100644 index 61bcd791b5..0000000000 --- a/pcsx2/IPU/mpeg2lib/Vlc.h +++ /dev/null @@ -1,663 +0,0 @@ -/* - * vlc.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// WARNING! This file should only be included into Mpeg.cpp AND NOWHERE ELSE. -// All contents of this file are used only by Mpeg.cpp, and including it elsewhere will -// just result in the linker having to remove a whole lot of redundant/unused decoder -// tables and static functions. -- air - -#pragma once - -static __fi int GETWORD() -{ - return g_BP.FillBuffer(16); -} - -// Removes bits from the bitstream. This is done independently of UBITS/SBITS because a -// lot of mpeg streams have to read ahead and rewind bits and re-read them at different -// bit depths or sign'age. -static __fi void DUMPBITS(uint num) -{ - g_BP.Advance(num); - //pxAssume(g_BP.FP != 0); -} - -static __fi u32 GETBITS(uint num) -{ - uint retVal = UBITS(num); - g_BP.Advance(num); - - return retVal; -} - -struct MBtab { - u8 modes; - u8 len; -}; - -struct MVtab { - u8 delta; - u8 len; -}; - -struct DMVtab { - s8 dmv; - u8 len; -}; - -struct CBPtab { - u8 cbp; - u8 len; -}; - -struct DCtab { - u8 size; - u8 len; -}; - -struct DCTtab { - u8 run; - u8 level; - u8 len; -}; - -struct MBAtab { - u8 mba; - u8 len; -}; - - -#define INTRA MACROBLOCK_INTRA -#define QUANT MACROBLOCK_QUANT - -static const MBtab MB_I [] = { - {INTRA|QUANT, 2}, {INTRA, 1} -}; - -#define MC MACROBLOCK_MOTION_FORWARD -#define CODED MACROBLOCK_PATTERN - -alignas(16) static const MBtab MB_P [] = { - {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, - {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} -}; - -#define FWD MACROBLOCK_MOTION_FORWARD -#define BWD MACROBLOCK_MOTION_BACKWARD -#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD - -alignas(16) static const MBtab MB_B [] = { - {0, 0}, {INTRA|QUANT, 6}, - {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, - {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, - {INTRA, 5}, {INTRA, 5}, - {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, - {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} -}; - -#undef INTRA -#undef QUANT -#undef MC -#undef CODED -#undef FWD -#undef BWD -#undef INTER - - -static const MVtab MV_4 [] = { - { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} -}; - -alignas(16) static const MVtab MV_10 [] = { - { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, - { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, - {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, - { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, - { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, - { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} -}; - - -static const DMVtab DMV_2 [] = { - { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} -}; - - -alignas(16) static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, - {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} -}; - -alignas(16) static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} -}; - -#if 0 // following tables are unused by PCSX2 - -static const DCtab DC_lum_5 [] = { - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} -}; - -static const DCtab DC_chrom_5 [] = { - {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} -}; - -static const DCtab DC_long [] = { - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} -}; - -static const DCTtab DCT_16 [] = { - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, - { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, - { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, - { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} -}; - -static const DCTtab DCT_15 [] = { - { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, - { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, - { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, - { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, - { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, - { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, - { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, - { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, - { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, - { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, - { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, - { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} -}; - -static const DCTtab DCT_13 [] = { - { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, - { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, - { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, - { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, - { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, - { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, - { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, - { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, - { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, - { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, - { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, - { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} -}; - -static const DCTtab DCT_B14_10 [] = { - { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, - { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} -}; - -static const DCTtab DCT_B14_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, - { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, - { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, - { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, - { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} -}; - -static const DCTtab DCT_B14AC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} -}; - -static const DCTtab DCT_B14DC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} -}; - -static const DCTtab DCT_B15_10 [] = { - { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, - { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} -}; - -static const DCTtab DCT_B15_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, - { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, - { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, - { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, - { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, - { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, - { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, - { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} -}; -#endif - -struct MBAtabSet -{ - MBAtab mba5[30]; - MBAtab mba11[26*4]; -}; -alignas(16) static const MBAtabSet MBA = { - { // mba5 - {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, - {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} - }, - - { // mba11 - {32, 11}, {31, 11}, {30, 11}, {29, 11}, - {28, 11}, {27, 11}, {26, 11}, {25, 11}, - {24, 11}, {23, 11}, {22, 11}, {21, 11}, - {20, 10}, {20, 10}, {19, 10}, {19, 10}, - {18, 10}, {18, 10}, {17, 10}, {17, 10}, - {16, 10}, {16, 10}, {15, 10}, {15, 10}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} - } -}; - -// New - -#if 0 // Not used by PCSX2 -/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */ -static MBAtab MBAtab1[16] = -{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4}, - {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3} -}; - -/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */ -static MBAtab MBAtab2[104] = -{ - {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11}, - {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10}, - {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10}, - {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, - {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, - {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, - {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, - {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, - {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, - {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, - {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, - {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, - {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7} -}; -#endif - -struct DCtabSet -{ - DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 - DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 - DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 - DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 -}; - -alignas(16) static const DCtabSet DCtable = -{ - // lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ - { {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} }, - - /* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ - { {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} }, - - /* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ - { {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} }, - - /* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ - { {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, - {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, - {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, - {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} }, -}; - -struct DCTtabSet -{ - DCTtab first[12]; - DCTtab next[12]; - - DCTtab tab0[60]; - DCTtab tab0a[252]; - DCTtab tab1[8]; - DCTtab tab1a[8]; - - DCTtab tab2[16]; - DCTtab tab3[16]; - DCTtab tab4[16]; - DCTtab tab5[16]; - DCTtab tab6[16]; -}; - -alignas(16) static const DCTtabSet DCT = -{ - /* first[12]: Table B-14, DCT coefficients table zero, - * codes 0100 ... 1xxx (used for first (DC) coefficient) - */ - { {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, - {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, - {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} }, - - /* next[12]: Table B-14, DCT coefficients table zero, - * codes 0100 ... 1xxx (used for all other coefficients) - */ - { {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, - {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */ - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} }, - - /* tab0[60]: Table B-14, DCT coefficients table zero, - * codes 000001xx ... 00111xxx - */ - { {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ - {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7}, - {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7}, - {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, - {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6}, - {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6}, - {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, - {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8}, - {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8}, - {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, - {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, - {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, - {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} }, - - /* tab0a[252]: Table B-15, DCT coefficients table one, - * codes 000001xx ... 11111111 - */ - { {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ - {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7}, - {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7}, - {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, - {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6}, - {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6}, - {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, - {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8}, - {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8}, - {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, - {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, - {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, - {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */ - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, - {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, - {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, - {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, - {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7}, - {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7}, - {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8}, - {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} }, - - /* Table B-14, DCT coefficients table zero, - * codes 0000001000 ... 0000001111 - */ - { {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10}, - {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} }, - - /* Table B-15, DCT coefficients table one, - * codes 000000100x ... 000000111x - */ - { {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9}, - {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 000000010000 ... 000000011111 - */ - { {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12}, - {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12}, - {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12}, - {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 0000000010000 ... 0000000011111 - */ - { {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13}, - {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13}, - {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13}, - {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 00000000010000 ... 00000000011111 - */ - { {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14}, - {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14}, - {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14}, - {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 000000000010000 ... 000000000011111 - */ - { {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15}, - {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15}, - {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15}, - {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 0000000000010000 ... 0000000000011111 - */ - { {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16}, - {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16}, - {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16}, - {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} } - -}; diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 51db3182e5..c0ef19b3d2 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -20,9 +20,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" -#include "yuv2rgb.h" -#include "mpeg2lib/Mpeg.h" +#include "IPU/IPU.h" +#include "IPU/IPU_MultiISA.h" +#include "IPU/yuv2rgb.h" // The IPU's colour space conversion conforms to ITU-R Recommendation BT.601 if anyone wants to make a // faster or "more accurate" implementation, but this is the precise documented integer method used by diff --git a/pcsx2/pcsx2core.vcxproj b/pcsx2/pcsx2core.vcxproj index 46018909b2..ed26318a7d 100644 --- a/pcsx2/pcsx2core.vcxproj +++ b/pcsx2/pcsx2core.vcxproj @@ -450,8 +450,6 @@ - - @@ -579,6 +577,7 @@ + @@ -782,8 +781,6 @@ - - diff --git a/pcsx2/pcsx2core.vcxproj.filters b/pcsx2/pcsx2core.vcxproj.filters index a29bd0c053..4bea3e0524 100644 --- a/pcsx2/pcsx2core.vcxproj.filters +++ b/pcsx2/pcsx2core.vcxproj.filters @@ -94,9 +94,6 @@ {4dab2d06-69e0-4f3e-b6d3-45e5e85af940} - - {67e51016-d1db-44d2-910d-349d2833f798} - {5602cc18-9d1c-49c8-9509-7e4cf9ecd91b} @@ -653,12 +650,6 @@ System\Ps2\IPU - - System\Ps2\IPU\mpeg2lib - - - System\Ps2\IPU\mpeg2lib - System\Ps2\GS\GIF @@ -1619,12 +1610,6 @@ System\Ps2\IPU - - System\Ps2\IPU\mpeg2lib - - - System\Ps2\IPU\mpeg2lib - System\Ps2\Debug @@ -2345,6 +2330,9 @@ System\Ps2\GS\Renderers\Hardware + + System\Ps2\IPU +