From d6c1af1a0cd3634544830db1e558fefdc1d7dac3 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 16 Jan 2023 22:19:58 +1000 Subject: [PATCH] IPU: Combine source files to work around an LTO bug in clang 15 Works around an LTO bug which seems to occur in clang 15, but not in clang 12. The entire else branch in the 0 case in get_non_intra_block() was being removed... Fixes Burnout 3 crashing in clang builds. --- pcsx2/CMakeLists.txt | 5 +- pcsx2/IPU/IPU.cpp | 164 +--- pcsx2/IPU/IPU.h | 6 - pcsx2/IPU/IPU_Fifo.cpp | 4 +- pcsx2/IPU/IPU_MultiISA.cpp | 1610 ++++++++++++++++++++++++++++++- pcsx2/IPU/IPU_MultiISA.h | 154 ++- pcsx2/IPU/IPUdither.cpp | 10 +- pcsx2/IPU/IPUdma.cpp | 4 +- pcsx2/IPU/mpeg2_vlc.h | 485 ++++++++++ pcsx2/IPU/mpeg2lib/Idct.cpp | 271 ------ pcsx2/IPU/mpeg2lib/Mpeg.cpp | 1285 ------------------------ pcsx2/IPU/mpeg2lib/Mpeg.h | 239 ----- pcsx2/IPU/mpeg2lib/Vlc.h | 663 ------------- pcsx2/IPU/yuv2rgb.cpp | 6 +- pcsx2/pcsx2core.vcxproj | 5 +- pcsx2/pcsx2core.vcxproj.filters | 18 +- 16 files changed, 2286 insertions(+), 2643 deletions(-) create mode 100644 pcsx2/IPU/mpeg2_vlc.h delete mode 100644 pcsx2/IPU/mpeg2lib/Idct.cpp delete mode 100644 pcsx2/IPU/mpeg2lib/Mpeg.cpp delete mode 100644 pcsx2/IPU/mpeg2lib/Mpeg.h delete mode 100644 pcsx2/IPU/mpeg2lib/Vlc.h diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 7d9ccc121a..140e036c50 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -687,8 +687,6 @@ set(pcsx2IPUSources set(pcsx2IPUSourcesUnshared IPU/IPU_MultiISA.cpp IPU/IPUdither.cpp - IPU/mpeg2lib/Idct.cpp - IPU/mpeg2lib/Mpeg.cpp IPU/yuv2rgb.cpp ) @@ -698,8 +696,7 @@ set(pcsx2IPUHeaders IPU/IPU_Fifo.h IPU/IPU_MultiISA.h IPU/IPUdma.h - IPU/mpeg2lib/Mpeg.h - IPU/mpeg2lib/Vlc.h + IPU/mpeg2_vlc.h IPU/yuv2rgb.h ) diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 3aa070fcd7..b0e366c74a 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -19,7 +19,6 @@ #include "IPU.h" #include "IPU_MultiISA.h" #include "IPUdma.h" -#include "mpeg2lib/Mpeg.h" #include #include "Config.h" @@ -46,10 +45,47 @@ int coded_block_pattern = 0; alignas(16) u8 g_ipu_indx4[16*16/2]; +alignas(16) const int non_linear_quantizer_scale[32] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + uint eecount_on_last_vdec = 0; bool FMVstarted = false; bool EnableFMV = false; +// Also defined in IPU_MultiISA.cpp, but IPU.cpp is not unshared. +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +__ri static u8 getBits32(u8* address, bool advance) +{ + if (!g_BP.FillBuffer(32)) + return 0; + + const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP / 8]; + + if (uint shift = (g_BP.BP & 7)) + { + u32 mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24); + + *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); + } + else + { + // Bit position-aligned -- no masking/shifting necessary + *(u32*)address = *(u32*)readpos; + } + + if (advance) + g_BP.Advance(32); + + return 1; +} + void tIPU_cmd::clear() { memzero_sse_a(*this); @@ -89,7 +125,6 @@ void ReportIPU() Console.WriteLn("thresh = 0x%x.", g_ipu_thresh); Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern); Console.WriteLn("g_decoder = 0x%x.", &decoder); - Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan); Console.WriteLn(ipu_cmd.desc()); Console.Newline(); } @@ -346,7 +381,6 @@ __fi bool ipuWrite64(u32 mem, u64 value) return true; } - ////////////////////////////////////////////////////// // IPU Commands (exec on worker thread only) @@ -416,130 +450,6 @@ static void ipuSETTH(u32 val) IPU_LOG("SETTH (Set threshold value)command %x.", val&0x1ff01ff); } -// -------------------------------------------------------------------------------------- -// Buffer reader -// -------------------------------------------------------------------------------------- - -__ri u32 UBITS(uint bits) -{ - uint readpos8 = g_BP.BP/8; - - uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 )); - uint bp7 = (g_BP.BP & 7); - result <<= bp7; - result >>= (32 - bits); - - return result; -} - -__ri s32 SBITS(uint bits) -{ - // Read an unaligned 32 bit value and then shift the bits up and then back down. - - uint readpos8 = g_BP.BP/8; - - int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 )); - uint bp7 = (g_BP.BP & 7); - result <<= bp7; - result >>= (32 - bits); - - return result; -} - -// whenever reading fractions of bytes. The low bits always come from the next byte -// while the high bits come from the current byte -u8 getBits64(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(64)) return 0; - - const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; - - if (uint shift = (g_BP.BP & 7)) - { - u64 mask = (0xff >> shift); - mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56); - - *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift); - } - else - { - *(u64*)address = *(u64*)readpos; - } - - if (advance) g_BP.Advance(64); - - return 1; -} - -// whenever reading fractions of bytes. The low bits always come from the next byte -// while the high bits come from the current byte -__fi u8 getBits32(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(32)) return 0; - - const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8]; - - if(uint shift = (g_BP.BP & 7)) - { - u32 mask = (0xff >> shift); - mask = mask | (mask << 8) | (mask << 16) | (mask << 24); - - *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); - } - else - { - // Bit position-aligned -- no masking/shifting necessary - *(u32*)address = *(u32*)readpos; - } - - if (advance) g_BP.Advance(32); - - return 1; -} - -__fi u8 getBits16(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(16)) return 0; - - const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; - - if (uint shift = (g_BP.BP & 7)) - { - uint mask = (0xff >> shift); - mask = mask | (mask << 8); - *(u16*)address = ((~mask & *(u16*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u16*)readpos) << shift); - } - else - { - *(u16*)address = *(u16*)readpos; - } - - if (advance) g_BP.Advance(16); - - return 1; -} - -u8 getBits8(u8 *address, bool advance) -{ - if (!g_BP.FillBuffer(8)) return 0; - - const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; - - if (uint shift = (g_BP.BP & 7)) - { - uint mask = (0xff >> shift); - *(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift); - } - else - { - *(u8*)address = *(u8*)readpos; - } - - if (advance) g_BP.Advance(8); - - return 1; -} - // -------------------------------------------------------------------------------------- // IPU Worker / Dispatcher // -------------------------------------------------------------------------------------- diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h index fcfd2ebe78..4dc8cc03ea 100644 --- a/pcsx2/IPU/IPU.h +++ b/pcsx2/IPU/IPU.h @@ -293,7 +293,6 @@ extern bool EnableFMV; alignas(16) extern tIPU_cmd ipu_cmd; extern uint eecount_on_last_vdec; -extern int coded_block_pattern; extern bool CommandExecuteQueued; extern void ipuReset(); @@ -307,8 +306,3 @@ extern void IPUCMD_WRITE(u32 val); extern void ipuSoftReset(); extern void IPUProcessInterrupt(); -extern u8 getBits64(u8 *address, bool advance); -extern u8 getBits32(u8 *address, bool advance); -extern u8 getBits16(u8 *address, bool advance); -extern u8 getBits8(u8 *address, bool advance); - diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp index 9765b97b96..3df82a3a27 100644 --- a/pcsx2/IPU/IPU_Fifo.cpp +++ b/pcsx2/IPU/IPU_Fifo.cpp @@ -15,9 +15,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" +#include "IPU/IPU.h" #include "IPU/IPUdma.h" -#include "mpeg2lib/Mpeg.h" +#include "IPU/IPU_MultiISA.h" alignas(16) IPU_Fifo ipu_fifo; diff --git a/pcsx2/IPU/IPU_MultiISA.cpp b/pcsx2/IPU/IPU_MultiISA.cpp index 7c94d975e2..1c8d323649 100644 --- a/pcsx2/IPU/IPU_MultiISA.cpp +++ b/pcsx2/IPU/IPU_MultiISA.cpp @@ -11,20 +11,1575 @@ * * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . + * + * Some of the functions in this file are based on the mpeg2dec library, + * + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * Modified by Florin for PCSX2 emu + * + * under the GPL license. However, they have been heavily rewritten for PCSX2 usage. + * The original author's copyright statement is included above for completeness sake. */ -#include "IPU_MultiISA.h" +#include "IPU/IPU.h" +#include "IPU/IPUdma.h" +#include "IPU/yuv2rgb.h" +#include "IPU/IPU_MultiISA.h" +#include "common/MemsetFast.inl" -#include "IPU.h" -#include "IPUdma.h" -#include "yuv2rgb.h" +// the IPU is fixed to 16 byte strides (128-bit / QWC resolution): +static const uint decoder_stride = 16; + +#if MULTI_ISA_COMPILE_ONCE + +static constexpr std::array make_clip_lut() +{ + std::array lut = {}; + for (int i = -384; i < 640; i++) + lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + return lut; +} + +static constexpr mpeg2_scan_pack make_scan_pack() +{ + constexpr u8 mpeg2_scan_norm[64] = { + /* Zig-Zag scan pattern */ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 + }; + + constexpr u8 mpeg2_scan_alt[64] = { + /* Alternate scan pattern */ + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 + }; + + mpeg2_scan_pack pack = {}; + + for (int i = 0; i < 64; i++) { + int j = mpeg2_scan_norm[i]; + pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } + + return pack; +} + +alignas(16) const std::array g_idct_clip_lut = make_clip_lut(); +alignas(16) const mpeg2_scan_pack mpeg2_scan = make_scan_pack(); + +#endif MULTI_ISA_UNSHARED_START +static void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn); +static void ipu_vq(macroblock_rgb16& rgb16, u8* indx4); + +// -------------------------------------------------------------------------------------- +// Buffer reader +// -------------------------------------------------------------------------------------- + +__ri static u32 UBITS(uint bits) +{ + uint readpos8 = g_BP.BP/8; + + uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 )); + uint bp7 = (g_BP.BP & 7); + result <<= bp7; + result >>= (32 - bits); + + return result; +} + +__ri static s32 SBITS(uint bits) +{ + // Read an unaligned 32 bit value and then shift the bits up and then back down. + + uint readpos8 = g_BP.BP/8; + + int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 )); + uint bp7 = (g_BP.BP & 7); + result <<= bp7; + result >>= (32 - bits); + + return result; +} + +__fi static int GETWORD() +{ + return g_BP.FillBuffer(16); +} + +// Removes bits from the bitstream. This is done independently of UBITS/SBITS because a +// lot of mpeg streams have to read ahead and rewind bits and re-read them at different +// bit depths or sign'age. +__fi static void DUMPBITS(uint num) +{ + g_BP.Advance(num); + //pxAssume(g_BP.FP != 0); +} + +__fi static u32 GETBITS(uint num) +{ + uint retVal = UBITS(num); + g_BP.Advance(num); + + return retVal; +} + +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +__ri static u8 getBits64(u8 *address, bool advance) +{ + if (!g_BP.FillBuffer(64)) return 0; + + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; + + if (uint shift = (g_BP.BP & 7)) + { + u64 mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56); + + *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift); + } + else + { + *(u64*)address = *(u64*)readpos; + } + + if (advance) g_BP.Advance(64); + + return 1; +} + +// whenever reading fractions of bytes. The low bits always come from the next byte +// while the high bits come from the current byte +__ri static u8 getBits32(u8 *address, bool advance) +{ + if (!g_BP.FillBuffer(32)) return 0; + + const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8]; + + if(uint shift = (g_BP.BP & 7)) + { + u32 mask = (0xff >> shift); + mask = mask | (mask << 8) | (mask << 16) | (mask << 24); + + *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift); + } + else + { + // Bit position-aligned -- no masking/shifting necessary + *(u32*)address = *(u32*)readpos; + } + + if (advance) g_BP.Advance(32); + + return 1; +} + +__ri static u8 getBits8(u8 *address, bool advance) +{ + if (!g_BP.FillBuffer(8)) return 0; + + const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8]; + + if (uint shift = (g_BP.BP & 7)) + { + uint mask = (0xff >> shift); + *(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift); + } + else + { + *(u8*)address = *(u8*)readpos; + } + + if (advance) g_BP.Advance(8); + + return 1; +} + + +#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ +#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ +#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ +#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ +#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ +#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ + +/* + * In legal streams, the IDCT output should be between -384 and +384. + * In corrupted streams, it is possible to force the IDCT output to go + * to +-3826 - this is the worst case for a column IDCT where the + * column inputs are 16-bit values. + */ + +__fi static void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1) +{ + int tmp = w0 * (d0 + d1); + t0 = tmp + (w1 - w0) * d1; + t1 = tmp - (w1 + w0) * d0; +} + +__ri static void IDCT_Block(s16* block) +{ + for (int i = 0; i < 8; i++) + { + s16* const rblock = block + 8 * i; + if (!(rblock[1] | ((s32*)rblock)[1] | ((s32*)rblock)[2] | + ((s32*)rblock)[3])) + { + u32 tmp = (u16)(rblock[0] << 3); + tmp |= tmp << 16; + ((s32*)rblock)[0] = tmp; + ((s32*)rblock)[1] = tmp; + ((s32*)rblock)[2] = tmp; + ((s32*)rblock)[3] = tmp; + continue; + } + + int a0, a1, a2, a3; + { + const int d0 = (rblock[0] << 11) + 128; + const int d1 = rblock[1]; + const int d2 = rblock[2] << 11; + const int d3 = rblock[3]; + int t0 = d0 + d2; + int t1 = d0 - d2; + int t2, t3; + BUTTERFLY(t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + } + + int b0, b1, b2, b3; + { + const int d0 = rblock[4]; + const int d1 = rblock[5]; + const int d2 = rblock[6]; + const int d3 = rblock[7]; + int t0, t1, t2, t3; + BUTTERFLY(t0, t1, W7, W1, d3, d0); + BUTTERFLY(t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) * 181) >> 8; + b2 = ((t0 - t1) * 181) >> 8; + } + + rblock[0] = (a0 + b0) >> 8; + rblock[1] = (a1 + b1) >> 8; + rblock[2] = (a2 + b2) >> 8; + rblock[3] = (a3 + b3) >> 8; + rblock[4] = (a3 - b3) >> 8; + rblock[5] = (a2 - b2) >> 8; + rblock[6] = (a1 - b1) >> 8; + rblock[7] = (a0 - b0) >> 8; + } + + for (int i = 0; i < 8; i++) + { + s16* const cblock = block + i; + + int a0, a1, a2, a3; + { + const int d0 = (cblock[8 * 0] << 11) + 65536; + const int d1 = cblock[8 * 1]; + const int d2 = cblock[8 * 2] << 11; + const int d3 = cblock[8 * 3]; + const int t0 = d0 + d2; + const int t1 = d0 - d2; + int t2; + int t3; + BUTTERFLY(t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + } + + int b0, b1, b2, b3; + { + const int d0 = cblock[8 * 4]; + const int d1 = cblock[8 * 5]; + const int d2 = cblock[8 * 6]; + const int d3 = cblock[8 * 7]; + int t0, t1, t2, t3; + BUTTERFLY(t0, t1, W7, W1, d3, d0); + BUTTERFLY(t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 = (t0 - t2) >> 8; + t1 = (t1 - t3) >> 8; + b1 = (t0 + t1) * 181; + b2 = (t0 - t1) * 181; + } + + cblock[8 * 0] = (a0 + b0) >> 17; + cblock[8 * 1] = (a1 + b1) >> 17; + cblock[8 * 2] = (a2 + b2) >> 17; + cblock[8 * 3] = (a3 + b3) >> 17; + cblock[8 * 4] = (a3 - b3) >> 17; + cblock[8 * 5] = (a2 - b2) >> 17; + cblock[8 * 6] = (a1 - b1) >> 17; + cblock[8 * 7] = (a0 - b0) >> 17; + } +} + +__ri static void IDCT_Copy(s16* block, u8* dest, const int stride) +{ + IDCT_Block(block); + + for (int i = 0; i < 8; i++) + { + dest[0] = (g_idct_clip_lut.data() + 384)[block[0]]; + dest[1] = (g_idct_clip_lut.data() + 384)[block[1]]; + dest[2] = (g_idct_clip_lut.data() + 384)[block[2]]; + dest[3] = (g_idct_clip_lut.data() + 384)[block[3]]; + dest[4] = (g_idct_clip_lut.data() + 384)[block[4]]; + dest[5] = (g_idct_clip_lut.data() + 384)[block[5]]; + dest[6] = (g_idct_clip_lut.data() + 384)[block[6]]; + dest[7] = (g_idct_clip_lut.data() + 384)[block[7]]; + + std::memset(block, 0, 16); + + dest += stride; + block += 8; + } +} + + +// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]). +__ri static void IDCT_Add(const int last, s16* block, s16* dest, const int stride) +{ + // on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0). + + if (last != 129 || (block[0] & 7) == 4) + { + IDCT_Block(block); + + __m128 zero = _mm_setzero_ps(); + for (int i = 0; i < 8; i++) + { + _mm_store_ps((float*)dest, _mm_load_ps((float*)block)); + _mm_store_ps((float*)block, zero); + + dest += stride; + block += 8; + } + } + else + { + s16 DC = ((int)block[0] + 4) >> 3; + s16 dcf[2] = {DC, DC}; + block[0] = block[63] = 0; + + __m128 dc128 = _mm_set_ps1(*(float*)dcf); + + for (int i = 0; i < 8; ++i) + _mm_store_ps((float*)(dest + (stride * i)), dc128); + } +} + +/* Bitstream and buffer needs to be reallocated in order for successful + reading of the old data. Here the old data stored in the 2nd slot + of the internal buffer is copied to 1st slot, and the new data read + into 1st slot is copied to the 2nd slot. Which will later be copied + back to the 1st slot when 128bits have been read. +*/ +static const DCTtab * tab; +static int mbaCount = 0; + +__ri static int BitstreamInit () +{ + return g_BP.FillBuffer(32); +} + +static int GetMacroblockModes() +{ + int macroblock_modes; + const MBtab * tab; + + switch (decoder.coding_type) + { + case I_TYPE: + macroblock_modes = UBITS(2); + + if (macroblock_modes == 0) return 0; // error + + tab = MB_I + (macroblock_modes >> 1); + DUMPBITS(tab->len); + macroblock_modes = tab->modes; + + if ((!(decoder.frame_pred_frame_dct)) && + (decoder.picture_structure == FRAME_PICTURE)) + { + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; + } + return macroblock_modes; + + case P_TYPE: + macroblock_modes = UBITS(6); + + if (macroblock_modes == 0) return 0; // error + + tab = MB_P + (macroblock_modes >> 1); + DUMPBITS(tab->len); + macroblock_modes = tab->modes; + + if (decoder.picture_structure != FRAME_PICTURE) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + { + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + } + + return macroblock_modes; + } + else if (decoder.frame_pred_frame_dct) + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + + return macroblock_modes; + } + else + { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + { + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + } + + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; + } + + return macroblock_modes; + } + + case B_TYPE: + macroblock_modes = UBITS(6); + + if (macroblock_modes == 0) return 0; // error + + tab = MB_B + macroblock_modes; + DUMPBITS(tab->len); + macroblock_modes = tab->modes; + + if (decoder.picture_structure != FRAME_PICTURE) + { + if (!(macroblock_modes & MACROBLOCK_INTRA)) + { + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + } + return (macroblock_modes | (tab->len << 16)); + } + else if (decoder.frame_pred_frame_dct) + { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return (macroblock_modes | (tab->len << 16)); + } + else + { + if (macroblock_modes & MACROBLOCK_INTRA) goto intra; + + macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; + + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) + { +intra: + macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; + } + return (macroblock_modes | (tab->len << 16)); + } + + case D_TYPE: + macroblock_modes = GETBITS(1); + //I suspect (as this is actually a 2 bit command) that this should be getbits(2) + //additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction) + DevCon.Warning(" Rare MPEG command! "); + if (macroblock_modes == 0) return 0; // error + return (MACROBLOCK_INTRA | (1 << 16)); + + default: + return 0; + } +} + +__ri static int get_macroblock_address_increment() +{ + const MBAtab *mba; + + u16 code = UBITS(16); + + if (code >= 4096) + mba = MBA.mba5 + (UBITS(5) - 2); + else if (code >= 768) + mba = MBA.mba11 + (UBITS(11) - 24); + else switch (UBITS(11)) + { + case 8: /* macroblock_escape */ + DUMPBITS(11); + return 0xb0023; + + case 15: /* macroblock_stuffing (MPEG1 only) */ + if (decoder.mpeg1) + { + DUMPBITS(11); + return 0xb0022; + } + [[fallthrough]]; + + default: + return 0;//error + } + + DUMPBITS(mba->len); + + return ((mba->mba + 1) | (mba->len << 16)); +} + +__fi static int get_luma_dc_dct_diff() +{ + int size; + int dc_diff; + u16 code = UBITS(5); + + if (code < 31) + { + size = DCtable.lum0[code].size; + DUMPBITS(DCtable.lum0[code].len); + + // 5 bits max + } + else + { + code = UBITS(9) - 0x1f0; + size = DCtable.lum1[code].size; + DUMPBITS(DCtable.lum1[code].len); + + // 9 bits max + } + + if (size==0) + dc_diff = 0; + else + { + dc_diff = GETBITS(size); + + // 6 for tab0 and 11 for tab1 + if ((dc_diff & (1<<(size-1)))==0) + dc_diff-= (1< 4095) + val = (val >> 31) ^ 2047; +} + +__ri static bool get_intra_block() +{ + const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; + const u8 (&quant_matrix)[64] = decoder.iq; + int quantizer_scale = decoder.quantizer_scale; + s16 * dest = decoder.DCTblock; + u16 code; + + /* decode AC coefficients */ + for (int i=1 + ipu_cmd.pos[4]; ; i++) + { + switch (ipu_cmd.pos[5]) + { + case 0: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i - 1; + return false; + } + + code = UBITS(16); + + if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1)) + { + tab = &DCT.next[(code >> 12) - 4]; + } + else if (code >= 1024) + { + if (decoder.intra_vlc_format && !decoder.mpeg1) + { + tab = &DCT.tab0a[(code >> 8) - 4]; + } + else + { + tab = &DCT.tab0[(code >> 8) - 4]; + } + } + else if (code >= 512) + { + if (decoder.intra_vlc_format && !decoder.mpeg1) + { + tab = &DCT.tab1a[(code >> 6) - 8]; + } + else + { + tab = &DCT.tab1[(code >> 6) - 8]; + } + } + + // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup + // that should use a single unrolled DCT table instead of five separate tables used + // here. Multiple conditional statements are very slow, while modern CPU data caches + // have lots of room to spare. + + else if (code >= 256) + { + tab = &DCT.tab2[(code >> 4) - 16]; + } + else if (code >= 128) + { + tab = &DCT.tab3[(code >> 3) - 16]; + } + else if (code >= 64) + { + tab = &DCT.tab4[(code >> 2) - 16]; + } + else if (code >= 32) + { + tab = &DCT.tab5[(code >> 1) - 16]; + } + else if (code >= 16) + { + tab = &DCT.tab6[code - 16]; + } + else + { + ipu_cmd.pos[4] = 0; + return true; + } + + DUMPBITS(tab->len); + + if (tab->run==64) /* end_of_block */ + { + ipu_cmd.pos[4] = 0; + return true; + } + + i += (tab->run == 65) ? GETBITS(6) : tab->run; + if (i >= 64) + { + ipu_cmd.pos[4] = 0; + return true; + } + [[fallthrough]]; + + case 1: + { + if (!GETWORD()) + { + ipu_cmd.pos[4] = i - 1; + ipu_cmd.pos[5] = 1; + return false; + } + + uint j = scan[i]; + int val; + + if (tab->run==65) /* escape */ + { + if(!decoder.mpeg1) + { + val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); + + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = (val * quantizer_scale * quant_matrix[i]) >> 4; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + if(decoder.mpeg1) + { + /* oddification */ + val = (val - 1) | 1; + } + + /* if (bitstream_get (1)) val = -val; */ + int bit1 = SBITS(1); + val = (val ^ bit1) - bit1; + DUMPBITS(1); + } + + SATURATE(val); + dest[j] = val; + ipu_cmd.pos[5] = 0; + } + } + } + + ipu_cmd.pos[4] = 0; + return true; +} + +__ri static bool get_non_intra_block(int * last) +{ + int i; + int j; + int val; + const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; + const u8 (&quant_matrix)[64] = decoder.niq; + int quantizer_scale = decoder.quantizer_scale; + s16 * dest = decoder.DCTblock; + u16 code; + + /* decode AC coefficients */ + for (i= ipu_cmd.pos[4] ; ; i++) + { + switch (ipu_cmd.pos[5]) + { + case 0: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i; + return false; + } + + code = UBITS(16); + + if (code >= 16384) + { + if (i==0) + { + tab = &DCT.first[(code >> 12) - 4]; + } + else + { + tab = &DCT.next[(code >> 12)- 4]; + } + } + else if (code >= 1024) + { + tab = &DCT.tab0[(code >> 8) - 4]; + } + else if (code >= 512) + { + tab = &DCT.tab1[(code >> 6) - 8]; + } + + // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup + // that should use a single unrolled DCT table instead of five separate tables used + // here. Multiple conditional statements are very slow, while modern CPU data caches + // have lots of room to spare. + + else if (code >= 256) + { + tab = &DCT.tab2[(code >> 4) - 16]; + } + else if (code >= 128) + { + tab = &DCT.tab3[(code >> 3) - 16]; + } + else if (code >= 64) + { + tab = &DCT.tab4[(code >> 2) - 16]; + } + else if (code >= 32) + { + tab = &DCT.tab5[(code >> 1) - 16]; + } + else if (code >= 16) + { + tab = &DCT.tab6[code - 16]; + } + else + { + ipu_cmd.pos[4] = 0; + return true; + } + + DUMPBITS(tab->len); + + if (tab->run==64) /* end_of_block */ + { + *last = i; + ipu_cmd.pos[4] = 0; + return true; + } + + i += (tab->run == 65) ? GETBITS(6) : tab->run; + if (i >= 64) + { + *last = i; + ipu_cmd.pos[4] = 0; + return true; + } + [[fallthrough]]; + + case 1: + if (!GETWORD()) + { + ipu_cmd.pos[4] = i; + ipu_cmd.pos[5] = 1; + return false; + } + + j = scan[i]; + + if (tab->run==65) /* escape */ + { + if (!decoder.mpeg1) + { + val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5; + DUMPBITS(12); + } + else + { + val = SBITS(8); + DUMPBITS(8); + + if (!(val & 0x7f)) + { + val = GETBITS(8) + 2 * val; + } + + val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32; + val = (val + ~ (((s32)val) >> 31)) | 1; + } + } + else + { + int bit1 = SBITS(1); + val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; + val = (val ^ bit1) - bit1; + DUMPBITS(1); + } + + SATURATE(val); + dest[j] = val; + ipu_cmd.pos[5] = 0; + } + } + + ipu_cmd.pos[4] = 0; + return true; +} + +__ri static bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) +{ + if (!skip || ipu_cmd.pos[3]) + { + ipu_cmd.pos[3] = 0; + if (!GETWORD()) + { + ipu_cmd.pos[3] = 1; + return false; + } + + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + decoder.dc_dct_pred[0] += get_luma_dc_dct_diff(); + else + decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff(); + + decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision); + } + + if (!get_intra_block()) + { + return false; + } + + IDCT_Copy(decoder.DCTblock, dest, stride); + + return true; +} + +__ri static bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) +{ + int last; + + if (!skip) + { + memzero_sse_a(decoder.DCTblock); + } + + if (!get_non_intra_block(&last)) + { + return false; + } + + IDCT_Add(last, decoder.DCTblock, dest, stride); + + return true; +} + +__fi static void finishmpeg2sliceIDEC() +{ + ipuRegs.ctrl.SCD = 0; + coded_block_pattern = decoder.coded_block_pattern; +} + +__ri static bool mpeg2sliceIDEC() +{ + u16 code; + + switch (ipu_cmd.pos[0]) + { + case 0: + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + + ipuRegs.top = 0; + ipuRegs.ctrl.ECD = 0; + [[fallthrough]]; + + case 1: + ipu_cmd.pos[0] = 1; + if (!BitstreamInit()) + { + return false; + } + [[fallthrough]]; + + case 2: + ipu_cmd.pos[0] = 2; + while (1) + { + // IPU0 isn't ready for data, so let's wait for it to be + if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[1] <= 2) + { + return false; + } + macroblock_8& mb8 = decoder.mb8; + macroblock_rgb16& rgb16 = decoder.rgb16; + macroblock_rgb32& rgb32 = decoder.rgb32; + + int DCT_offset, DCT_stride; + const MBAtab * mba; + + switch (ipu_cmd.pos[1]) + { + case 0: + decoder.macroblock_modes = GetMacroblockModes(); + + if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC + { + const int quantizer_scale_code = GETBITS(5); + if (decoder.q_scale_type) + decoder.quantizer_scale = non_linear_quantizer_scale[quantizer_scale_code]; + else + decoder.quantizer_scale = quantizer_scale_code << 1; + } + + decoder.coded_block_pattern = 0x3F;//all 6 blocks + memzero_sse_a(mb8); + memzero_sse_a(rgb32); + [[fallthrough]]; + + case 1: + ipu_cmd.pos[1] = 1; + + if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) + { + DCT_offset = decoder_stride; + DCT_stride = decoder_stride * 2; + } + else + { + DCT_offset = decoder_stride * 8; + DCT_stride = decoder_stride; + } + + switch (ipu_cmd.pos[2]) + { + case 0: + case 1: + if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1)) + { + ipu_cmd.pos[2] = 1; + return false; + } + [[fallthrough]]; + + case 2: + if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2)) + { + ipu_cmd.pos[2] = 2; + return false; + } + [[fallthrough]]; + + case 3: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3)) + { + ipu_cmd.pos[2] = 3; + return false; + } + [[fallthrough]]; + + case 4: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4)) + { + ipu_cmd.pos[2] = 4; + return false; + } + [[fallthrough]]; + + case 5: + if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5)) + { + ipu_cmd.pos[2] = 5; + return false; + } + [[fallthrough]]; + + case 6: + if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6)) + { + ipu_cmd.pos[2] = 6; + return false; + } + break; + + jNO_DEFAULT; + } + + // Send The MacroBlock via DmaIpuFrom + ipu_csc(mb8, rgb32, decoder.sgn); + + if (decoder.ofm == 0) + decoder.SetOutputTo(rgb32); + else + { + ipu_dither(rgb32, rgb16, decoder.dte); + decoder.SetOutputTo(rgb16); + } + [[fallthrough]]; + + case 2: + { + + pxAssert(decoder.ipu0_data > 0); + + uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + decoder.AdvanceIpuDataBy(read); + + if (decoder.ipu0_data != 0) + { + // IPU FIFO filled up -- Will have to finish transferring later. + ipu_cmd.pos[1] = 2; + return false; + } + + mbaCount = 0; + if (read) + { + ipu_cmd.pos[1] = 3; + return false; + } + } + [[fallthrough]]; + + case 3: + while (1) + { + if (!GETWORD()) + { + ipu_cmd.pos[1] = 3; + return false; + } + + code = UBITS(16); + if (code >= 0x1000) + { + mba = MBA.mba5 + (UBITS(5) - 2); + break; + } + else if (code >= 0x0300) + { + mba = MBA.mba11 + (UBITS(11) - 24); + break; + } + else switch (UBITS(11)) + { + case 8: /* macroblock_escape */ + mbaCount += 33; + [[fallthrough]]; + + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS(11); + continue; + + default: /* end of slice/frame, or error? */ + { + goto finish_idec; + } + } + } + + DUMPBITS(mba->len); + mbaCount += mba->mba; + + if (mbaCount) + { + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + } + [[fallthrough]]; + + case 4: + if (!GETWORD()) + { + ipu_cmd.pos[1] = 4; + return false; + } + break; + + jNO_DEFAULT; + } + + ipu_cmd.pos[1] = 0; + ipu_cmd.pos[2] = 0; + } + +finish_idec: + finishmpeg2sliceIDEC(); + [[fallthrough]]; + + case 3: + { + u8 bit8; + u32 start_check; + if (!getBits8((u8*)&bit8, 0)) + { + ipu_cmd.pos[0] = 3; + return false; + } + + if (bit8 == 0) + { + g_BP.Align(); + do + { + if (!g_BP.FillBuffer(24)) + { + ipu_cmd.pos[0] = 3; + return false; + } + start_check = UBITS(24); + if (start_check != 0) + { + if (start_check == 1) + { + ipuRegs.ctrl.SCD = 1; + } + else + { + ipuRegs.ctrl.ECD = 1; + } + break; + } + DUMPBITS(8); + } while (1); + } + } + [[fallthrough]]; + + case 4: + if (!getBits32((u8*)&ipuRegs.top, 0)) + { + ipu_cmd.pos[0] = 4; + return false; + } + + ipuRegs.top = BigEndian(ipuRegs.top); + break; + + jNO_DEFAULT; + } + + return true; +} + +__fi static bool mpeg2_slice() +{ + int DCT_offset, DCT_stride; + + macroblock_8& mb8 = decoder.mb8; + macroblock_16& mb16 = decoder.mb16; + + switch (ipu_cmd.pos[0]) + { + case 0: + if (decoder.dcr) + { + decoder.dc_dct_pred[0] = + decoder.dc_dct_pred[1] = + decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; + } + + ipuRegs.ctrl.ECD = 0; + ipuRegs.top = 0; + memzero_sse_a(mb8); + memzero_sse_a(mb16); + [[fallthrough]]; + + case 1: + if (!BitstreamInit()) + { + ipu_cmd.pos[0] = 1; + return false; + } + [[fallthrough]]; + + case 2: + ipu_cmd.pos[0] = 2; + + // IPU0 isn't ready for data, so let's wait for it to be + if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[0] <= 3) + { + return false; + } + + if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) + { + DCT_offset = decoder_stride; + DCT_stride = decoder_stride * 2; + } + else + { + DCT_offset = decoder_stride * 8; + DCT_stride = decoder_stride; + } + + if (decoder.macroblock_modes & MACROBLOCK_INTRA) + { + switch(ipu_cmd.pos[1]) + { + case 0: + decoder.coded_block_pattern = 0x3F; + [[fallthrough]]; + + case 1: + if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1)) + { + ipu_cmd.pos[1] = 1; + return false; + } + [[fallthrough]]; + + case 2: + if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) + { + ipu_cmd.pos[1] = 2; + return false; + } + [[fallthrough]]; + + case 3: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) + { + ipu_cmd.pos[1] = 3; + return false; + } + [[fallthrough]]; + + case 4: + if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) + { + ipu_cmd.pos[1] = 4; + return false; + } + [[fallthrough]]; + + case 5: + if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) + { + ipu_cmd.pos[1] = 5; + return false; + } + [[fallthrough]]; + + case 6: + if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) + { + ipu_cmd.pos[1] = 6; + return false; + } + break; + + jNO_DEFAULT; + } + + // Copy macroblock8 to macroblock16 - without sign extension. + // Manually inlined due to MSVC refusing to inline the SSE-optimized version. + { + const u8 *s = (const u8*)&mb8; + u16 *d = (u16*)&mb16; + + //Y bias - 16 * 16 + //Cr bias - 8 * 8 + //Cb bias - 8 * 8 + + __m128i zeroreg = _mm_setzero_si128(); + + for (uint i = 0; i < (256+64+64) / 32; ++i) + { + //*d++ = *s++; + __m128i woot1 = _mm_load_si128((__m128i*)s); + __m128i woot2 = _mm_load_si128((__m128i*)s+1); + _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg)); + _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg)); + _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg)); + _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg)); + s += 32; + d += 32; + } + } + } + else + { + if (decoder.macroblock_modes & MACROBLOCK_PATTERN) + { + switch(ipu_cmd.pos[1]) + { + case 0: + { + // Get coded block pattern + const CBPtab* tab; + u16 code = UBITS(16); + + if (code >= 0x2000) + tab = CBP_7 + (UBITS(7) - 16); + else + tab = CBP_9 + UBITS(9); + + DUMPBITS(tab->len); + decoder.coded_block_pattern = tab->cbp; + } + [[fallthrough]]; + + case 1: + if (decoder.coded_block_pattern & 0x20) + { + if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1)) + { + ipu_cmd.pos[1] = 1; + return false; + } + } + [[fallthrough]]; + + case 2: + if (decoder.coded_block_pattern & 0x10) + { + if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) + { + ipu_cmd.pos[1] = 2; + return false; + } + } + [[fallthrough]]; + + case 3: + if (decoder.coded_block_pattern & 0x08) + { + if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) + { + ipu_cmd.pos[1] = 3; + return false; + } + } + [[fallthrough]]; + + case 4: + if (decoder.coded_block_pattern & 0x04) + { + if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) + { + ipu_cmd.pos[1] = 4; + return false; + } + } + [[fallthrough]]; + + case 5: + if (decoder.coded_block_pattern & 0x2) + { + if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) + { + ipu_cmd.pos[1] = 5; + return false; + } + } + [[fallthrough]]; + + case 6: + if (decoder.coded_block_pattern & 0x1) + { + if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) + { + ipu_cmd.pos[1] = 6; + return false; + } + } + break; + + jNO_DEFAULT; + } + } + } + + // Send The MacroBlock via DmaIpuFrom + ipuRegs.ctrl.SCD = 0; + coded_block_pattern = decoder.coded_block_pattern; + + decoder.SetOutputTo(mb16); + [[fallthrough]]; + + case 3: + { + pxAssert(decoder.ipu0_data > 0); + + uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); + decoder.AdvanceIpuDataBy(read); + + if (decoder.ipu0_data != 0) + { + // IPU FIFO filled up -- Will have to finish transferring later. + ipu_cmd.pos[0] = 3; + return false; + } + + mbaCount = 0; + if (read) + { + ipu_cmd.pos[0] = 4; + return false; + } + } + [[fallthrough]]; + + case 4: + { + u8 bit8; + u32 start_check; + if (!getBits8((u8*)&bit8, 0)) + { + ipu_cmd.pos[0] = 4; + return false; + } + + if (bit8 == 0) + { + g_BP.Align(); + do + { + if (!g_BP.FillBuffer(24)) + { + ipu_cmd.pos[0] = 4; + return false; + } + start_check = UBITS(24); + if (start_check != 0) + { + if (start_check == 1) + { + ipuRegs.ctrl.SCD = 1; + } + else + { + ipuRegs.ctrl.ECD = 1; + } + break; + } + DUMPBITS(8); + } while (1); + } + } + [[fallthrough]]; + + case 5: + if (!getBits32((u8*)&ipuRegs.top, 0)) + { + ipu_cmd.pos[0] = 5; + return false; + } + + ipuRegs.top = BigEndian(ipuRegs.top); + break; + } + + return true; +} + + ////////////////////////////////////////////////////// // IPU Commands (exec on worker thread only) -static __fi bool ipuVDEC(u32 val) +__fi static bool ipuVDEC(u32 val) { static int count = 0; if (count++ > 5) { @@ -39,7 +1594,7 @@ static __fi bool ipuVDEC(u32 val) switch (ipu_cmd.pos[0]) { case 0: - if (!bitstream_init()) return false; + if (!BitstreamInit()) return false; switch ((val >> 26) & 3) { @@ -51,15 +1606,42 @@ static __fi bool ipuVDEC(u32 val) case 1://Macroblock Type decoder.frame_pred_frame_dct = 1; decoder.coding_type = ipuRegs.ctrl.PCT > 0 ? ipuRegs.ctrl.PCT : 1; // Kaiketsu Zorro Mezase doesn't set a Picture type, seems happy with I - ipuRegs.cmd.DATA = get_macroblock_modes(); + ipuRegs.cmd.DATA = GetMacroblockModes(); break; case 2://Motion Code - ipuRegs.cmd.DATA = get_motion_delta(0); + { + const u16 code = UBITS(16); + if ((code & 0x8000)) + { + DUMPBITS(1); + ipuRegs.cmd.DATA = 0x00010000; + } + else + { + const MVtab* tab; + if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00)) + tab = MV_4 + UBITS(4); + else + tab = MV_10 + UBITS(10); + + const int delta = tab->delta + 1; + DUMPBITS(tab->len); + + const int sign = SBITS(1); + DUMPBITS(1); + + ipuRegs.cmd.DATA = (((delta ^ sign) - sign) | (tab->len << 16)); + } + } break; case 3://DMVector - ipuRegs.cmd.DATA = get_dmv(); + { + const DMVtab* tab = DMV_2 + UBITS(2); + DUMPBITS(tab->len); + ipuRegs.cmd.DATA = (tab->dmv | (tab->len << 16)); + } break; jNO_DEFAULT @@ -100,7 +1682,7 @@ static __fi bool ipuVDEC(u32 val) return false; } -static __ri bool ipuFDEC(u32 val) +__ri static bool ipuFDEC(u32 val) { if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false; @@ -185,7 +1767,7 @@ static bool ipuSETVQ(u32 val) } // IPU Transfers are split into 8Qwords so we need to send ALL the data -static __ri bool ipuCSC(tIPU_CMD_CSC csc) +__ri static bool ipuCSC(tIPU_CMD_CSC csc) { csc.log_from_YCbCr(); @@ -217,7 +1799,7 @@ static __ri bool ipuCSC(tIPU_CMD_CSC csc) return true; } -static __ri bool ipuPACK(tIPU_CMD_CSC csc) +__ri static bool ipuPACK(tIPU_CMD_CSC csc) { csc.log_from_RGB32(); @@ -254,7 +1836,7 @@ static __ri bool ipuPACK(tIPU_CMD_CSC csc) // CORE Functions (referenced from MPEG library) // -------------------------------------------------------------------------------------- -__fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) +__fi static void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) { int i; u8* p = (u8*)&rgb32; @@ -288,7 +1870,7 @@ __fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn) } } -__fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) +__fi static void ipu_vq(macroblock_rgb16& rgb16, u8* indx4) { const auto closest_index = [&](int i, int j) { u8 index = 0; diff --git a/pcsx2/IPU/IPU_MultiISA.h b/pcsx2/IPU/IPU_MultiISA.h index d671e9a6f0..b56e07ae5c 100644 --- a/pcsx2/IPU/IPU_MultiISA.h +++ b/pcsx2/IPU/IPU_MultiISA.h @@ -15,13 +15,163 @@ #pragma once +#include "IPU/IPU.h" +#include "IPU/mpeg2_vlc.h" #include "GS/MultiISA.h" -#include "mpeg2lib/Mpeg.h" -MULTI_ISA_DEF(void IPUWorker();) +#ifdef _MSC_VER +#include +#endif + +#ifdef _MSC_VER +#define BigEndian(in) _byteswap_ulong(in) +#else +#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap... +#endif + +#ifdef _MSC_VER +#define BigEndian64(in) _byteswap_uint64(in) +#else +#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap... +#endif + +struct macroblock_8{ + u8 Y[16][16]; //0 + u8 Cb[8][8]; //1 + u8 Cr[8][8]; //2 +}; + +struct macroblock_16{ + s16 Y[16][16]; //0 + s16 Cb[8][8]; //1 + s16 Cr[8][8]; //2 +}; + +struct macroblock_rgb32{ + struct { + u8 r, g, b, a; + } c[16][16]; +}; + +struct rgb16_t{ + u16 r:5, g:5, b:5, a:1; +}; + +struct macroblock_rgb16{ + rgb16_t c[16][16]; +}; + +struct decoder_t { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* DCT coefficients - should be kept aligned ! */ + s16 DCTblock[64]; + + u8 niq[64]; //non-intraquant matrix (sequence header) + u8 iq[64]; //intraquant matrix (sequence header) + + macroblock_8 mb8; + macroblock_16 mb16; + macroblock_rgb32 rgb32; + macroblock_rgb16 rgb16; + + uint ipu0_data; // amount of data in the output macroblock (in QWC) + uint ipu0_idx; + + int quantizer_scale; + + /* now non-slice-specific information */ + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int coding_type; + + /* picture coding extension stuff */ + + /* predictor for DC coefficients in intra blocks */ + s16 dc_dct_pred[3]; + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bit to indicate which quantization table to use */ + int q_scale_type; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + // Pseudo Sign Offset + int sgn; + // Dither Enable + int dte; + // Output Format + int ofm; + // Macroblock type + int macroblock_modes; + // DC Reset + int dcr; + // Coded block pattern + int coded_block_pattern; + + /* stuff derived from bitstream */ + + /* the zigzag scan we're supposed to be using, true for alt, false for normal */ + bool scantype; + + int mpeg1; + + template< typename T > + void SetOutputTo( T& obj ) + { + uint mb_offset = ((uptr)&obj - (uptr)&mb8); + pxAssume( (mb_offset & 15) == 0 ); + ipu0_idx = mb_offset / 16; + ipu0_data = sizeof(obj)/16; + } + + u128* GetIpuDataPtr() + { + return ((u128*)&mb8) + ipu0_idx; + } + + void AdvanceIpuDataBy(uint amt) + { + pxAssertMsg(ipu0_data>=amt, "IPU FIFO Overflow on advance!" ); + ipu0_idx += amt; + ipu0_data -= amt; + } +}; + +alignas(16) extern decoder_t decoder; +alignas(16) extern tIPU_BP g_BP; + +MULTI_ISA_DEF( + extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte); + + void IPUWorker(); +) // Quantization matrix extern rgb16_t g_ipu_vqclut[16]; //clut conversion table extern u16 g_ipu_thresh[2]; //thresholds for color conversions alignas(16) extern u8 g_ipu_indx4[16*16/2]; +alignas(16) extern const int non_linear_quantizer_scale[32]; +extern int coded_block_pattern; + +struct mpeg2_scan_pack +{ + u8 norm[64]; + u8 alt[64]; +}; + +alignas(16) extern const std::array g_idct_clip_lut; +alignas(16) extern const mpeg2_scan_pack mpeg2_scan; diff --git a/pcsx2/IPU/IPUdither.cpp b/pcsx2/IPU/IPUdither.cpp index a2d1f07877..a7bf620590 100644 --- a/pcsx2/IPU/IPUdither.cpp +++ b/pcsx2/IPU/IPUdither.cpp @@ -16,12 +16,10 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" -#include "IPUdma.h" -#include "yuv2rgb.h" -#include "mpeg2lib/Mpeg.h" - -#include "GS/MultiISA.h" +#include "IPU/IPU.h" +#include "IPU/IPUdma.h" +#include "IPU/yuv2rgb.h" +#include "IPU/IPU_MultiISA.h" MULTI_ISA_UNSHARED_START diff --git a/pcsx2/IPU/IPUdma.cpp b/pcsx2/IPU/IPUdma.cpp index e7e9f83d48..132b65859c 100644 --- a/pcsx2/IPU/IPUdma.cpp +++ b/pcsx2/IPU/IPUdma.cpp @@ -15,9 +15,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" +#include "IPU/IPU.h" #include "IPU/IPUdma.h" -#include "mpeg2lib/Mpeg.h" +#include "IPU/IPU_MultiISA.h" IPUStatus IPU1Status; bool CommandExecuteQueued; diff --git a/pcsx2/IPU/mpeg2_vlc.h b/pcsx2/IPU/mpeg2_vlc.h new file mode 100644 index 0000000000..997c8d68d4 --- /dev/null +++ b/pcsx2/IPU/mpeg2_vlc.h @@ -0,0 +1,485 @@ +/* + * vlc.h + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * Modified by Florin for PCSX2 emu + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* NOTE: While part of this header is originally from libmpeg2, which is GPL - licensed, + * it's not substantial and does not contain any functions, therefore can be argued + * not to be a derived work. See http://lkml.iu.edu/hypermail/linux/kernel/0301.1/0362.html + * The constants themselves can also be argued to be part of the MPEG-2 standard, whose + * patents expired worldwide in Feb 2020. + */ + +#pragma once +#include + +#ifdef _MSC_VER +#define VLC_ALIGNED16 __declspec(align(16)) +#else +#define VLC_ALIGNED16 __attribute__((aligned(16))) +#endif + +enum macroblock_modes +{ + MACROBLOCK_INTRA = 1, + MACROBLOCK_PATTERN = 2, + MACROBLOCK_MOTION_BACKWARD = 4, + MACROBLOCK_MOTION_FORWARD = 8, + MACROBLOCK_QUANT = 16, + DCT_TYPE_INTERLACED = 32 +}; + +enum motion_type +{ + MOTION_TYPE_SHIFT = 6, + MOTION_TYPE_MASK = (3 * 64), + MOTION_TYPE_BASE = 64, + MC_FIELD = (1 * 64), + MC_FRAME = (2 * 64), + MC_16X8 = (2 * 64), + MC_DMV = (3 * 64) +}; + +/* picture structure */ +enum picture_structure +{ + TOP_FIELD = 1, + BOTTOM_FIELD = 2, + FRAME_PICTURE = 3 +}; + +/* picture coding type */ +enum picture_coding_type +{ + I_TYPE = 1, + P_TYPE = 2, + B_TYPE = 3, + D_TYPE = 4 +}; + +struct MBtab +{ + std::uint8_t modes; + std::uint8_t len; +}; + +struct MVtab +{ + std::uint8_t delta; + std::uint8_t len; +}; + +struct DMVtab +{ + std::int8_t dmv; + std::uint8_t len; +}; + +struct CBPtab +{ + std::uint8_t cbp; + std::uint8_t len; +}; + +struct DCtab +{ + std::uint8_t size; + std::uint8_t len; +}; + +struct DCTtab +{ + std::uint8_t run; + std::uint8_t level; + std::uint8_t len; +}; + +struct MBAtab +{ + std::uint8_t mba; + std::uint8_t len; +}; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static constexpr MBtab MB_I[] = { + {INTRA | QUANT, 2}, {INTRA, 1}}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static constexpr VLC_ALIGNED16 MBtab MB_P[] = { + {INTRA | QUANT, 6}, {CODED | QUANT, 5}, {MC | CODED | QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, + {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD + +static constexpr VLC_ALIGNED16 MBtab MB_B[] = { + {0, 0}, {INTRA | QUANT, 6}, + {BWD | CODED | QUANT, 6}, {FWD | CODED | QUANT, 6}, + {INTER | CODED | QUANT, 5}, {INTER | CODED | QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD | CODED, 4}, {FWD | CODED, 4}, {FWD | CODED, 4}, {FWD | CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, + {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, + {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static constexpr MVtab MV_4[] = { + {3, 6}, {2, 4}, {1, 3}, {1, 3}, {0, 2}, {0, 2}, {0, 2}, {0, 2}}; + +static constexpr VLC_ALIGNED16 MVtab MV_10[] = { + {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, + {0, 10}, {0, 10}, {0, 10}, {0, 10}, {15, 10}, {14, 10}, {13, 10}, {12, 10}, + {11, 10}, {10, 10}, {9, 9}, {9, 9}, {8, 9}, {8, 9}, {7, 9}, {7, 9}, + {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, + {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, + {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}}; + + +static constexpr DMVtab DMV_2[] = { + {0, 1}, {0, 1}, {1, 2}, {-1, 2}}; + + +static constexpr VLC_ALIGNED16 CBPtab CBP_7[] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}}; + +static constexpr VLC_ALIGNED16 CBPtab CBP_9[] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}}; + +struct MBAtabSet +{ + MBAtab mba5[30]; + MBAtab mba11[26 * 4]; +}; +static constexpr VLC_ALIGNED16 MBAtabSet MBA = { + {// mba5 + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}}, + + {// mba11 + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {9, 8}, {9, 8}, {9, 8}, {9, 8}, + {9, 8}, {9, 8}, {9, 8}, {9, 8}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}}}; + +struct DCtabSet +{ + DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 + DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 + DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 + DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 +}; + +static constexpr VLC_ALIGNED16 DCtabSet DCtable = + { + // lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ + {{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0}}, + + /* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ + {{7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}}, + + /* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ + {{0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0}}, + + /* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ + {{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, + {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, + {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, + {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10, 10}, {11, 10}}, +}; + +struct DCTtabSet +{ + DCTtab first[12]; + DCTtab next[12]; + + DCTtab tab0[60]; + DCTtab tab0a[252]; + DCTtab tab1[8]; + DCTtab tab1a[8]; + + DCTtab tab2[16]; + DCTtab tab3[16]; + DCTtab tab4[16]; + DCTtab tab5[16]; + DCTtab tab6[16]; +}; + +static constexpr VLC_ALIGNED16 DCTtabSet DCT = + { + /* first[12]: Table B-14, DCT coefficients table zero, + * codes 0100 ... 1xxx (used for first (DC) coefficient) + */ + {{0, 2, 4}, {2, 1, 4}, {1, 1, 3}, {1, 1, 3}, + {0, 1, 1}, {0, 1, 1}, {0, 1, 1}, {0, 1, 1}, + {0, 1, 1}, {0, 1, 1}, {0, 1, 1}, {0, 1, 1}}, + + /* next[12]: Table B-14, DCT coefficients table zero, + * codes 0100 ... 1xxx (used for all other coefficients) + */ + {{0, 2, 4}, {2, 1, 4}, {1, 1, 3}, {1, 1, 3}, + {64, 0, 2}, {64, 0, 2}, {64, 0, 2}, {64, 0, 2}, /* EOB */ + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}}, + + /* tab0[60]: Table B-14, DCT coefficients table zero, + * codes 000001xx ... 00111xxx + */ + {{65, 0, 6}, {65, 0, 6}, {65, 0, 6}, {65, 0, 6}, /* Escape */ + {2, 2, 7}, {2, 2, 7}, {9, 1, 7}, {9, 1, 7}, + {0, 4, 7}, {0, 4, 7}, {8, 1, 7}, {8, 1, 7}, + {7, 1, 6}, {7, 1, 6}, {7, 1, 6}, {7, 1, 6}, + {6, 1, 6}, {6, 1, 6}, {6, 1, 6}, {6, 1, 6}, + {1, 2, 6}, {1, 2, 6}, {1, 2, 6}, {1, 2, 6}, + {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, + {13, 1, 8}, {0, 6, 8}, {12, 1, 8}, {11, 1, 8}, + {3, 2, 8}, {1, 3, 8}, {0, 5, 8}, {10, 1, 8}, + {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, + {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, {0, 3, 5}, + {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, + {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, {4, 1, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}}, + + /* tab0a[252]: Table B-15, DCT coefficients table one, + * codes 000001xx ... 11111111 + */ + {{65, 0, 6}, {65, 0, 6}, {65, 0, 6}, {65, 0, 6}, /* Escape */ + {7, 1, 7}, {7, 1, 7}, {8, 1, 7}, {8, 1, 7}, + {6, 1, 7}, {6, 1, 7}, {2, 2, 7}, {2, 2, 7}, + {0, 7, 6}, {0, 7, 6}, {0, 7, 6}, {0, 7, 6}, + {0, 6, 6}, {0, 6, 6}, {0, 6, 6}, {0, 6, 6}, + {4, 1, 6}, {4, 1, 6}, {4, 1, 6}, {4, 1, 6}, + {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, {5, 1, 6}, + {1, 5, 8}, {11, 1, 8}, {0, 11, 8}, {0, 10, 8}, + {13, 1, 8}, {12, 1, 8}, {3, 2, 8}, {1, 4, 8}, + {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, + {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, {2, 1, 5}, + {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, + {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, {1, 2, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, + {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3}, + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, /* EOB */ + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, + {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3}, + {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, + {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, {0, 4, 5}, + {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, + {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, {0, 5, 5}, + {9, 1, 7}, {9, 1, 7}, {1, 3, 7}, {1, 3, 7}, + {10, 1, 7}, {10, 1, 7}, {0, 8, 7}, {0, 8, 7}, + {0, 9, 7}, {0, 9, 7}, {0, 12, 8}, {0, 13, 8}, + {2, 3, 8}, {4, 2, 8}, {0, 14, 8}, {0, 15, 8}}, + + /* Table B-14, DCT coefficients table zero, + * codes 0000001000 ... 0000001111 + */ + {{16, 1, 10}, {5, 2, 10}, {0, 7, 10}, {2, 3, 10}, + {1, 4, 10}, {15, 1, 10}, {14, 1, 10}, {4, 2, 10}}, + + /* Table B-15, DCT coefficients table one, + * codes 000000100x ... 000000111x + */ + {{5, 2, 9}, {5, 2, 9}, {14, 1, 9}, {14, 1, 9}, + {2, 4, 10}, {16, 1, 10}, {15, 1, 9}, {15, 1, 9}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 000000010000 ... 000000011111 + */ + {{0, 11, 12}, {8, 2, 12}, {4, 3, 12}, {0, 10, 12}, + {2, 4, 12}, {7, 2, 12}, {21, 1, 12}, {20, 1, 12}, + {0, 9, 12}, {19, 1, 12}, {18, 1, 12}, {1, 5, 12}, + {3, 3, 12}, {0, 8, 12}, {6, 2, 12}, {17, 1, 12}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 0000000010000 ... 0000000011111 + */ + {{10, 2, 13}, {9, 2, 13}, {5, 3, 13}, {3, 4, 13}, + {2, 5, 13}, {1, 7, 13}, {1, 6, 13}, {0, 15, 13}, + {0, 14, 13}, {0, 13, 13}, {0, 12, 13}, {26, 1, 13}, + {25, 1, 13}, {24, 1, 13}, {23, 1, 13}, {22, 1, 13}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 00000000010000 ... 00000000011111 + */ + {{0, 31, 14}, {0, 30, 14}, {0, 29, 14}, {0, 28, 14}, + {0, 27, 14}, {0, 26, 14}, {0, 25, 14}, {0, 24, 14}, + {0, 23, 14}, {0, 22, 14}, {0, 21, 14}, {0, 20, 14}, + {0, 19, 14}, {0, 18, 14}, {0, 17, 14}, {0, 16, 14}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 000000000010000 ... 000000000011111 + */ + {{0, 40, 15}, {0, 39, 15}, {0, 38, 15}, {0, 37, 15}, + {0, 36, 15}, {0, 35, 15}, {0, 34, 15}, {0, 33, 15}, + {0, 32, 15}, {1, 14, 15}, {1, 13, 15}, {1, 12, 15}, + {1, 11, 15}, {1, 10, 15}, {1, 9, 15}, {1, 8, 15}}, + + /* Table B-14/15, DCT coefficients table zero / one, + * codes 0000000000010000 ... 0000000000011111 + */ + {{1, 18, 16}, {1, 17, 16}, {1, 16, 16}, {1, 15, 16}, + {6, 3, 16}, {16, 2, 16}, {15, 2, 16}, {14, 2, 16}, + {13, 2, 16}, {12, 2, 16}, {11, 2, 16}, {31, 1, 16}, + {30, 1, 16}, {29, 1, 16}, {28, 1, 16}, {27, 1, 16}} + +}; + +#undef VLC_ALIGNED16 \ No newline at end of file diff --git a/pcsx2/IPU/mpeg2lib/Idct.cpp b/pcsx2/IPU/mpeg2lib/Idct.cpp deleted file mode 100644 index dd1684b989..0000000000 --- a/pcsx2/IPU/mpeg2lib/Idct.cpp +++ /dev/null @@ -1,271 +0,0 @@ -/* - * idct.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// [TODO] : There are modern SSE versions of idct (idct_mmx.c) in the mpeg2 libs that we -// should probably upgrade to. They use their own raw-style intrinsics and not the intel -// compiler-integrated ones. - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "IPU/IPU.h" -#include "Mpeg.h" - -#include - -#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ -#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ -#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ -#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ -#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ -#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ - -/* - * In legal streams, the IDCT output should be between -384 and +384. - * In corrupted streams, it is possible to force the IDCT output to go - * to +-3826 - this is the worst case for a column IDCT where the - * column inputs are 16-bit values. - */ -alignas(16) extern const std::array g_idct_clip_lut; - -#define CLIP(i) ((g_idct_clip_lut.data()+384)[(i)]) - -MULTI_ISA_UNSHARED_START - -static __fi void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1) -{ -#if 0 - t0 = w0*d0 + w1*d1; - t1 = w0*d1 - w1*d0; -#else - int tmp = w0 * (d0 + d1); - t0 = tmp + (w1 - w0) * d1; - t1 = tmp - (w1 + w0) * d0; -#endif -} - -static __fi void idct_row (s16 * const block) -{ - int d0, d1, d2, d3; - int a0, a1, a2, a3, b0, b1, b2, b3; - int t0, t1, t2, t3; - - /* shortcut */ - if (!(block[1] | ((s32 *)block)[1] | ((s32 *)block)[2] | - ((s32 *)block)[3])) { - u32 tmp = (u16) (block[0] << 3); - tmp |= tmp << 16; - ((s32 *)block)[0] = tmp; - ((s32 *)block)[1] = tmp; - ((s32 *)block)[2] = tmp; - ((s32 *)block)[3] = tmp; - return; - } - - d0 = (block[0] << 11) + 128; - d1 = block[1]; - d2 = block[2] << 11; - d3 = block[3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[4]; - d1 = block[5]; - d2 = block[6]; - d3 = block[7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 -= t2; - t1 -= t3; - b1 = ((t0 + t1) * 181) >> 8; - b2 = ((t0 - t1) * 181) >> 8; - - block[0] = (a0 + b0) >> 8; - block[1] = (a1 + b1) >> 8; - block[2] = (a2 + b2) >> 8; - block[3] = (a3 + b3) >> 8; - block[4] = (a3 - b3) >> 8; - block[5] = (a2 - b2) >> 8; - block[6] = (a1 - b1) >> 8; - block[7] = (a0 - b0) >> 8; -} - -static __fi void idct_col (s16 * const block) -{ - int d0, d1, d2, d3; - int a0, a1, a2, a3, b0, b1, b2, b3; - int t0, t1, t2, t3; - - d0 = (block[8*0] << 11) + 65536; - d1 = block[8*1]; - d2 = block[8*2] << 11; - d3 = block[8*3]; - t0 = d0 + d2; - t1 = d0 - d2; - BUTTERFLY (t2, t3, W6, W2, d3, d1); - a0 = t0 + t2; - a1 = t1 + t3; - a2 = t1 - t3; - a3 = t0 - t2; - - d0 = block[8*4]; - d1 = block[8*5]; - d2 = block[8*6]; - d3 = block[8*7]; - BUTTERFLY (t0, t1, W7, W1, d3, d0); - BUTTERFLY (t2, t3, W3, W5, d1, d2); - b0 = t0 + t2; - b3 = t1 + t3; - t0 = (t0 - t2) >> 8; - t1 = (t1 - t3) >> 8; - b1 = (t0 + t1) * 181; - b2 = (t0 - t1) * 181; - - block[8*0] = (a0 + b0) >> 17; - block[8*1] = (a1 + b1) >> 17; - block[8*2] = (a2 + b2) >> 17; - block[8*3] = (a3 + b3) >> 17; - block[8*4] = (a3 - b3) >> 17; - block[8*5] = (a2 - b2) >> 17; - block[8*6] = (a1 - b1) >> 17; - block[8*7] = (a0 - b0) >> 17; -} - -__ri void mpeg2_idct_copy(s16 * block, u8 * dest, const int stride) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - - __m128 zero = _mm_setzero_ps(); - do { - dest[0] = CLIP (block[0]); - dest[1] = CLIP (block[1]); - dest[2] = CLIP (block[2]); - dest[3] = CLIP (block[3]); - dest[4] = CLIP (block[4]); - dest[5] = CLIP (block[5]); - dest[6] = CLIP (block[6]); - dest[7] = CLIP (block[7]); - - _mm_store_ps((float*)block, zero); - - dest += stride; - block += 8; - } while (--i); -} - - -// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]). -__ri void mpeg2_idct_add (const int last, s16 * block, s16 * dest, const int stride) -{ - // on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0). - - if (last != 129 || (block[0] & 7) == 4) - { - int i; - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - for (i = 0; i < 8; i++) - idct_col (block + i); - - __m128 zero = _mm_setzero_ps(); - do { - _mm_store_ps((float*)dest, _mm_load_ps((float*)block)); - _mm_store_ps((float*)block, zero); - - dest += stride; - block += 8; - } while (--i); - - } - else - { - s16 DC = ((int)block[0] + 4) >> 3; - s16 dcf[2] = { DC, DC }; - block[0] = block[63] = 0; - - __m128 dc128 = _mm_set_ps1(*(float*)dcf); - - for(int i=0; i<8; ++i) - _mm_store_ps((float*)(dest+(stride*i)), dc128); - } -} - -MULTI_ISA_UNSHARED_END - -#if MULTI_ISA_COMPILE_ONCE - -static constexpr std::array make_clip_lut() -{ - std::array lut = {}; - for (int i = -384; i < 640; i++) - lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); - return lut; -} - -static constexpr mpeg2_scan_pack make_scan_pack() -{ - constexpr u8 mpeg2_scan_norm[64] = { - /* Zig-Zag scan pattern */ - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 - }; - - constexpr u8 mpeg2_scan_alt[64] = { - /* Alternate scan pattern */ - 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, - 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, - 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, - 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 - }; - - mpeg2_scan_pack pack = {}; - - for (int i = 0; i < 64; i++) { - int j = mpeg2_scan_norm[i]; - pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - j = mpeg2_scan_alt[i]; - pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); - } - - return pack; -} - -alignas(16) constexpr std::array g_idct_clip_lut = make_clip_lut(); -alignas(16) constexpr mpeg2_scan_pack mpeg2_scan = make_scan_pack(); - -#endif diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp deleted file mode 100644 index 664f69c7a0..0000000000 --- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp +++ /dev/null @@ -1,1285 +0,0 @@ -/* - * Mpeg.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// [Air] Note: many functions in this module are large and only used once, so they -// have been forced to inline since it won't bloat the program and gets rid of -// some call overhead. - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "IPU/IPU.h" -#include "Mpeg.h" -#include "Vlc.h" - -#include "GS/MultiISA.h" - -#include "common/MemsetFast.inl" - -#if MULTI_ISA_COMPILE_ONCE - -const int non_linear_quantizer_scale [] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 -}; - -#endif - -MULTI_ISA_UNSHARED_START - -/* Bitstream and buffer needs to be reallocated in order for successful - reading of the old data. Here the old data stored in the 2nd slot - of the internal buffer is copied to 1st slot, and the new data read - into 1st slot is copied to the 2nd slot. Which will later be copied - back to the 1st slot when 128bits have been read. -*/ -const DCTtab * tab; -int mbaCount = 0; - -int bitstream_init () -{ - return g_BP.FillBuffer(32); -} - -int get_macroblock_modes() -{ - int macroblock_modes; - const MBtab * tab; - - switch (decoder.coding_type) - { - case I_TYPE: - macroblock_modes = UBITS(2); - - if (macroblock_modes == 0) return 0; // error - - tab = MB_I + (macroblock_modes >> 1); - DUMPBITS(tab->len); - macroblock_modes = tab->modes; - - if ((!(decoder.frame_pred_frame_dct)) && - (decoder.picture_structure == FRAME_PICTURE)) - { - macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; - } - return macroblock_modes; - - case P_TYPE: - macroblock_modes = UBITS(6); - - if (macroblock_modes == 0) return 0; // error - - tab = MB_P + (macroblock_modes >> 1); - DUMPBITS(tab->len); - macroblock_modes = tab->modes; - - if (decoder.picture_structure != FRAME_PICTURE) - { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - { - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - } - - return macroblock_modes; - } - else if (decoder.frame_pred_frame_dct) - { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - - return macroblock_modes; - } - else - { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - { - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - } - - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) - { - macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; - } - - return macroblock_modes; - } - - case B_TYPE: - macroblock_modes = UBITS(6); - - if (macroblock_modes == 0) return 0; // error - - tab = MB_B + macroblock_modes; - DUMPBITS(tab->len); - macroblock_modes = tab->modes; - - if (decoder.picture_structure != FRAME_PICTURE) - { - if (!(macroblock_modes & MACROBLOCK_INTRA)) - { - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - } - return (macroblock_modes | (tab->len << 16)); - } - else if (decoder.frame_pred_frame_dct) - { - /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME; - return (macroblock_modes | (tab->len << 16)); - } - else - { - if (macroblock_modes & MACROBLOCK_INTRA) goto intra; - - macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE; - - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) - { -intra: - macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED; - } - return (macroblock_modes | (tab->len << 16)); - } - - case D_TYPE: - macroblock_modes = GETBITS(1); - //I suspect (as this is actually a 2 bit command) that this should be getbits(2) - //additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction) - DevCon.Warning(" Rare MPEG command! "); - if (macroblock_modes == 0) return 0; // error - return (MACROBLOCK_INTRA | (1 << 16)); - - default: - return 0; - } -} - -static __fi int get_quantizer_scale() -{ - int quantizer_scale_code; - - quantizer_scale_code = GETBITS(5); - - if (decoder.q_scale_type) - return non_linear_quantizer_scale [quantizer_scale_code]; - else - return quantizer_scale_code << 1; -} - -static __fi int get_coded_block_pattern() -{ - const CBPtab * tab; - u16 code = UBITS(16); - - if (code >= 0x2000) - tab = CBP_7 + (UBITS(7) - 16); - else - tab = CBP_9 + UBITS(9); - - DUMPBITS(tab->len); - return tab->cbp; -} - -int __fi get_motion_delta(const int f_code) -{ - int delta; - int sign; - const MVtab * tab; - u16 code = UBITS(16); - - if ((code & 0x8000)) - { - DUMPBITS(1); - return 0x00010000; - } - else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00)) - { - tab = MV_4 + UBITS(4); - } - else - { - tab = MV_10 + UBITS(10); - } - - delta = tab->delta + 1; - DUMPBITS(tab->len); - - sign = SBITS(1); - DUMPBITS(1); - - return (((delta ^ sign) - sign) | (tab->len << 16)); -} - -int __fi get_dmv() -{ - const DMVtab* tab = DMV_2 + UBITS(2); - DUMPBITS(tab->len); - return (tab->dmv | (tab->len << 16)); -} - -int get_macroblock_address_increment() -{ - const MBAtab *mba; - - u16 code = UBITS(16); - - if (code >= 4096) - mba = MBA.mba5 + (UBITS(5) - 2); - else if (code >= 768) - mba = MBA.mba11 + (UBITS(11) - 24); - else switch (UBITS(11)) - { - case 8: /* macroblock_escape */ - DUMPBITS(11); - return 0xb0023; - - case 15: /* macroblock_stuffing (MPEG1 only) */ - if (decoder.mpeg1) - { - DUMPBITS(11); - return 0xb0022; - } - [[fallthrough]]; - - default: - return 0;//error - } - - DUMPBITS(mba->len); - - return ((mba->mba + 1) | (mba->len << 16)); -} - -static __fi int get_luma_dc_dct_diff() -{ - int size; - int dc_diff; - u16 code = UBITS(5); - - if (code < 31) - { - size = DCtable.lum0[code].size; - DUMPBITS(DCtable.lum0[code].len); - - // 5 bits max - } - else - { - code = UBITS(9) - 0x1f0; - size = DCtable.lum1[code].size; - DUMPBITS(DCtable.lum1[code].len); - - // 9 bits max - } - - if (size==0) - dc_diff = 0; - else - { - dc_diff = GETBITS(size); - - // 6 for tab0 and 11 for tab1 - if ((dc_diff & (1<<(size-1)))==0) - dc_diff-= (1< 4095) - val = (val >> 31) ^ 2047; -} - -static bool get_intra_block() -{ - const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; - const u8 (&quant_matrix)[64] = decoder.iq; - int quantizer_scale = decoder.quantizer_scale; - s16 * dest = decoder.DCTblock; - u16 code; - - /* decode AC coefficients */ - for (int i=1 + ipu_cmd.pos[4]; ; i++) - { - switch (ipu_cmd.pos[5]) - { - case 0: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i - 1; - return false; - } - - code = UBITS(16); - - if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1)) - { - tab = &DCT.next[(code >> 12) - 4]; - } - else if (code >= 1024) - { - if (decoder.intra_vlc_format && !decoder.mpeg1) - { - tab = &DCT.tab0a[(code >> 8) - 4]; - } - else - { - tab = &DCT.tab0[(code >> 8) - 4]; - } - } - else if (code >= 512) - { - if (decoder.intra_vlc_format && !decoder.mpeg1) - { - tab = &DCT.tab1a[(code >> 6) - 8]; - } - else - { - tab = &DCT.tab1[(code >> 6) - 8]; - } - } - - // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup - // that should use a single unrolled DCT table instead of five separate tables used - // here. Multiple conditional statements are very slow, while modern CPU data caches - // have lots of room to spare. - - else if (code >= 256) - { - tab = &DCT.tab2[(code >> 4) - 16]; - } - else if (code >= 128) - { - tab = &DCT.tab3[(code >> 3) - 16]; - } - else if (code >= 64) - { - tab = &DCT.tab4[(code >> 2) - 16]; - } - else if (code >= 32) - { - tab = &DCT.tab5[(code >> 1) - 16]; - } - else if (code >= 16) - { - tab = &DCT.tab6[code - 16]; - } - else - { - ipu_cmd.pos[4] = 0; - return true; - } - - DUMPBITS(tab->len); - - if (tab->run==64) /* end_of_block */ - { - ipu_cmd.pos[4] = 0; - return true; - } - - i += (tab->run == 65) ? GETBITS(6) : tab->run; - if (i >= 64) - { - ipu_cmd.pos[4] = 0; - return true; - } - [[fallthrough]]; - - case 1: - { - if (!GETWORD()) - { - ipu_cmd.pos[4] = i - 1; - ipu_cmd.pos[5] = 1; - return false; - } - - uint j = scan[i]; - int val; - - if (tab->run==65) /* escape */ - { - if(!decoder.mpeg1) - { - val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4; - DUMPBITS(12); - } - else - { - val = SBITS(8); - DUMPBITS(8); - - if (!(val & 0x7f)) - { - val = GETBITS(8) + 2 * val; - } - - val = (val * quantizer_scale * quant_matrix[i]) >> 4; - val = (val + ~ (((s32)val) >> 31)) | 1; - } - } - else - { - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; - if(decoder.mpeg1) - { - /* oddification */ - val = (val - 1) | 1; - } - - /* if (bitstream_get (1)) val = -val; */ - int bit1 = SBITS(1); - val = (val ^ bit1) - bit1; - DUMPBITS(1); - } - - SATURATE(val); - dest[j] = val; - ipu_cmd.pos[5] = 0; - } - } - } - - ipu_cmd.pos[4] = 0; - return true; -} - -static bool get_non_intra_block(int * last) -{ - int i; - int j; - int val; - const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm; - const u8 (&quant_matrix)[64] = decoder.niq; - int quantizer_scale = decoder.quantizer_scale; - s16 * dest = decoder.DCTblock; - u16 code; - - /* decode AC coefficients */ - for (i= ipu_cmd.pos[4] ; ; i++) - { - switch (ipu_cmd.pos[5]) - { - case 0: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i; - return false; - } - - code = UBITS(16); - - if (code >= 16384) - { - if (i==0) - { - tab = &DCT.first[(code >> 12) - 4]; - } - else - { - tab = &DCT.next[(code >> 12)- 4]; - } - } - else if (code >= 1024) - { - tab = &DCT.tab0[(code >> 8) - 4]; - } - else if (code >= 512) - { - tab = &DCT.tab1[(code >> 6) - 8]; - } - - // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup - // that should use a single unrolled DCT table instead of five separate tables used - // here. Multiple conditional statements are very slow, while modern CPU data caches - // have lots of room to spare. - - else if (code >= 256) - { - tab = &DCT.tab2[(code >> 4) - 16]; - } - else if (code >= 128) - { - tab = &DCT.tab3[(code >> 3) - 16]; - } - else if (code >= 64) - { - tab = &DCT.tab4[(code >> 2) - 16]; - } - else if (code >= 32) - { - tab = &DCT.tab5[(code >> 1) - 16]; - } - else if (code >= 16) - { - tab = &DCT.tab6[code - 16]; - } - else - { - ipu_cmd.pos[4] = 0; - return true; - } - - DUMPBITS(tab->len); - - if (tab->run==64) /* end_of_block */ - { - *last = i; - ipu_cmd.pos[4] = 0; - return true; - } - - i += (tab->run == 65) ? GETBITS(6) : tab->run; - if (i >= 64) - { - *last = i; - ipu_cmd.pos[4] = 0; - return true; - } - [[fallthrough]]; - - case 1: - if (!GETWORD()) - { - ipu_cmd.pos[4] = i; - ipu_cmd.pos[5] = 1; - return false; - } - - j = scan[i]; - - if (tab->run==65) /* escape */ - { - if (!decoder.mpeg1) - { - val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5; - DUMPBITS(12); - } - else - { - val = SBITS(8); - DUMPBITS(8); - - if (!(val & 0x7f)) - { - val = GETBITS(8) + 2 * val; - } - - val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32; - val = (val + ~ (((s32)val) >> 31)) | 1; - } - } - else - { - int bit1 = SBITS(1); - val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5; - val = (val ^ bit1) - bit1; - DUMPBITS(1); - } - - SATURATE(val); - dest[j] = val; - ipu_cmd.pos[5] = 0; - } - } - - ipu_cmd.pos[4] = 0; - return true; -} - -static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip) -{ - if (!skip || ipu_cmd.pos[3]) - { - ipu_cmd.pos[3] = 0; - if (!GETWORD()) - { - ipu_cmd.pos[3] = 1; - return false; - } - - /* Get the intra DC coefficient and inverse quantize it */ - if (cc == 0) - decoder.dc_dct_pred[0] += get_luma_dc_dct_diff(); - else - decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff(); - - decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision); - } - - if (!get_intra_block()) - { - return false; - } - - mpeg2_idct_copy(decoder.DCTblock, dest, stride); - - return true; -} - -static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip) -{ - int last; - - if (!skip) - { - memzero_sse_a(decoder.DCTblock); - } - - if (!get_non_intra_block(&last)) - { - return false; - } - - mpeg2_idct_add(last, decoder.DCTblock, dest, stride); - - return true; -} - -void __fi finishmpeg2sliceIDEC() -{ - ipuRegs.ctrl.SCD = 0; - coded_block_pattern = decoder.coded_block_pattern; -} - -__fi bool mpeg2sliceIDEC() -{ - u16 code; - - switch (ipu_cmd.pos[0]) - { - case 0: - decoder.dc_dct_pred[0] = - decoder.dc_dct_pred[1] = - decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; - - ipuRegs.top = 0; - ipuRegs.ctrl.ECD = 0; - [[fallthrough]]; - - case 1: - ipu_cmd.pos[0] = 1; - if (!bitstream_init()) - { - return false; - } - [[fallthrough]]; - - case 2: - ipu_cmd.pos[0] = 2; - while (1) - { - // IPU0 isn't ready for data, so let's wait for it to be - if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[1] <= 2) - { - return false; - } - macroblock_8& mb8 = decoder.mb8; - macroblock_rgb16& rgb16 = decoder.rgb16; - macroblock_rgb32& rgb32 = decoder.rgb32; - - int DCT_offset, DCT_stride; - const MBAtab * mba; - - switch (ipu_cmd.pos[1]) - { - case 0: - decoder.macroblock_modes = get_macroblock_modes(); - - if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC - { - decoder.quantizer_scale = get_quantizer_scale(); - } - - decoder.coded_block_pattern = 0x3F;//all 6 blocks - memzero_sse_a(mb8); - memzero_sse_a(rgb32); - [[fallthrough]]; - - case 1: - ipu_cmd.pos[1] = 1; - - if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) - { - DCT_offset = decoder_stride; - DCT_stride = decoder_stride * 2; - } - else - { - DCT_offset = decoder_stride * 8; - DCT_stride = decoder_stride; - } - - switch (ipu_cmd.pos[2]) - { - case 0: - case 1: - if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1)) - { - ipu_cmd.pos[2] = 1; - return false; - } - [[fallthrough]]; - - case 2: - if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2)) - { - ipu_cmd.pos[2] = 2; - return false; - } - [[fallthrough]]; - - case 3: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3)) - { - ipu_cmd.pos[2] = 3; - return false; - } - [[fallthrough]]; - - case 4: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4)) - { - ipu_cmd.pos[2] = 4; - return false; - } - [[fallthrough]]; - - case 5: - if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5)) - { - ipu_cmd.pos[2] = 5; - return false; - } - [[fallthrough]]; - - case 6: - if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6)) - { - ipu_cmd.pos[2] = 6; - return false; - } - break; - - jNO_DEFAULT; - } - - // Send The MacroBlock via DmaIpuFrom - ipu_csc(mb8, rgb32, decoder.sgn); - - if (decoder.ofm == 0) - decoder.SetOutputTo(rgb32); - else - { - ipu_dither(rgb32, rgb16, decoder.dte); - decoder.SetOutputTo(rgb16); - } - [[fallthrough]]; - - case 2: - { - - pxAssert(decoder.ipu0_data > 0); - - uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); - decoder.AdvanceIpuDataBy(read); - - if (decoder.ipu0_data != 0) - { - // IPU FIFO filled up -- Will have to finish transferring later. - ipu_cmd.pos[1] = 2; - return false; - } - - mbaCount = 0; - if (read) - { - ipu_cmd.pos[1] = 3; - return false; - } - } - [[fallthrough]]; - - case 3: - while (1) - { - if (!GETWORD()) - { - ipu_cmd.pos[1] = 3; - return false; - } - - code = UBITS(16); - if (code >= 0x1000) - { - mba = MBA.mba5 + (UBITS(5) - 2); - break; - } - else if (code >= 0x0300) - { - mba = MBA.mba11 + (UBITS(11) - 24); - break; - } - else switch (UBITS(11)) - { - case 8: /* macroblock_escape */ - mbaCount += 33; - [[fallthrough]]; - - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS(11); - continue; - - default: /* end of slice/frame, or error? */ - { - goto finish_idec; - } - } - } - - DUMPBITS(mba->len); - mbaCount += mba->mba; - - if (mbaCount) - { - decoder.dc_dct_pred[0] = - decoder.dc_dct_pred[1] = - decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; - } - [[fallthrough]]; - - case 4: - if (!GETWORD()) - { - ipu_cmd.pos[1] = 4; - return false; - } - break; - - jNO_DEFAULT; - } - - ipu_cmd.pos[1] = 0; - ipu_cmd.pos[2] = 0; - } - -finish_idec: - finishmpeg2sliceIDEC(); - [[fallthrough]]; - - case 3: - { - u8 bit8; - u32 start_check; - if (!getBits8((u8*)&bit8, 0)) - { - ipu_cmd.pos[0] = 3; - return false; - } - - if (bit8 == 0) - { - g_BP.Align(); - do - { - if (!g_BP.FillBuffer(24)) - { - ipu_cmd.pos[0] = 3; - return false; - } - start_check = UBITS(24); - if (start_check != 0) - { - if (start_check == 1) - { - ipuRegs.ctrl.SCD = 1; - } - else - { - ipuRegs.ctrl.ECD = 1; - } - break; - } - DUMPBITS(8); - } while (1); - } - } - [[fallthrough]]; - - case 4: - if (!getBits32((u8*)&ipuRegs.top, 0)) - { - ipu_cmd.pos[0] = 4; - return false; - } - - ipuRegs.top = BigEndian(ipuRegs.top); - break; - - jNO_DEFAULT; - } - - return true; -} - -__fi bool mpeg2_slice() -{ - int DCT_offset, DCT_stride; - - macroblock_8& mb8 = decoder.mb8; - macroblock_16& mb16 = decoder.mb16; - - switch (ipu_cmd.pos[0]) - { - case 0: - if (decoder.dcr) - { - decoder.dc_dct_pred[0] = - decoder.dc_dct_pred[1] = - decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision; - } - - ipuRegs.ctrl.ECD = 0; - ipuRegs.top = 0; - memzero_sse_a(mb8); - memzero_sse_a(mb16); - [[fallthrough]]; - - case 1: - if (!bitstream_init()) - { - ipu_cmd.pos[0] = 1; - return false; - } - [[fallthrough]]; - - case 2: - ipu_cmd.pos[0] = 2; - - // IPU0 isn't ready for data, so let's wait for it to be - if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[0] <= 3) - { - return false; - } - - if (decoder.macroblock_modes & DCT_TYPE_INTERLACED) - { - DCT_offset = decoder_stride; - DCT_stride = decoder_stride * 2; - } - else - { - DCT_offset = decoder_stride * 8; - DCT_stride = decoder_stride; - } - - if (decoder.macroblock_modes & MACROBLOCK_INTRA) - { - switch(ipu_cmd.pos[1]) - { - case 0: - decoder.coded_block_pattern = 0x3F; - [[fallthrough]]; - - case 1: - if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1)) - { - ipu_cmd.pos[1] = 1; - return false; - } - [[fallthrough]]; - - case 2: - if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) - { - ipu_cmd.pos[1] = 2; - return false; - } - [[fallthrough]]; - - case 3: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) - { - ipu_cmd.pos[1] = 3; - return false; - } - [[fallthrough]]; - - case 4: - if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) - { - ipu_cmd.pos[1] = 4; - return false; - } - [[fallthrough]]; - - case 5: - if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) - { - ipu_cmd.pos[1] = 5; - return false; - } - [[fallthrough]]; - - case 6: - if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) - { - ipu_cmd.pos[1] = 6; - return false; - } - break; - - jNO_DEFAULT; - } - - // Copy macroblock8 to macroblock16 - without sign extension. - // Manually inlined due to MSVC refusing to inline the SSE-optimized version. - { - const u8 *s = (const u8*)&mb8; - u16 *d = (u16*)&mb16; - - //Y bias - 16 * 16 - //Cr bias - 8 * 8 - //Cb bias - 8 * 8 - - __m128i zeroreg = _mm_setzero_si128(); - - for (uint i = 0; i < (256+64+64) / 32; ++i) - { - //*d++ = *s++; - __m128i woot1 = _mm_load_si128((__m128i*)s); - __m128i woot2 = _mm_load_si128((__m128i*)s+1); - _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg)); - _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg)); - _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg)); - _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg)); - s += 32; - d += 32; - } - } - } - else - { - if (decoder.macroblock_modes & MACROBLOCK_PATTERN) - { - switch(ipu_cmd.pos[1]) - { - case 0: - decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits - [[fallthrough]]; - - case 1: - if (decoder.coded_block_pattern & 0x20) - { - if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1)) - { - ipu_cmd.pos[1] = 1; - return false; - } - } - [[fallthrough]]; - - case 2: - if (decoder.coded_block_pattern & 0x10) - { - if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2)) - { - ipu_cmd.pos[1] = 2; - return false; - } - } - [[fallthrough]]; - - case 3: - if (decoder.coded_block_pattern & 0x08) - { - if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3)) - { - ipu_cmd.pos[1] = 3; - return false; - } - } - [[fallthrough]]; - - case 4: - if (decoder.coded_block_pattern & 0x04) - { - if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4)) - { - ipu_cmd.pos[1] = 4; - return false; - } - } - [[fallthrough]]; - - case 5: - if (decoder.coded_block_pattern & 0x2) - { - if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5)) - { - ipu_cmd.pos[1] = 5; - return false; - } - } - [[fallthrough]]; - - case 6: - if (decoder.coded_block_pattern & 0x1) - { - if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6)) - { - ipu_cmd.pos[1] = 6; - return false; - } - } - break; - - jNO_DEFAULT; - } - } - } - - // Send The MacroBlock via DmaIpuFrom - ipuRegs.ctrl.SCD = 0; - coded_block_pattern = decoder.coded_block_pattern; - - decoder.SetOutputTo(mb16); - [[fallthrough]]; - - case 3: - { - pxAssert(decoder.ipu0_data > 0); - - uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data); - decoder.AdvanceIpuDataBy(read); - - if (decoder.ipu0_data != 0) - { - // IPU FIFO filled up -- Will have to finish transferring later. - ipu_cmd.pos[0] = 3; - return false; - } - - mbaCount = 0; - if (read) - { - ipu_cmd.pos[0] = 4; - return false; - } - } - [[fallthrough]]; - - case 4: - { - u8 bit8; - u32 start_check; - if (!getBits8((u8*)&bit8, 0)) - { - ipu_cmd.pos[0] = 4; - return false; - } - - if (bit8 == 0) - { - g_BP.Align(); - do - { - if (!g_BP.FillBuffer(24)) - { - ipu_cmd.pos[0] = 4; - return false; - } - start_check = UBITS(24); - if (start_check != 0) - { - if (start_check == 1) - { - ipuRegs.ctrl.SCD = 1; - } - else - { - ipuRegs.ctrl.ECD = 1; - } - break; - } - DUMPBITS(8); - } while (1); - } - } - [[fallthrough]]; - - case 5: - if (!getBits32((u8*)&ipuRegs.top, 0)) - { - ipu_cmd.pos[0] = 5; - return false; - } - - ipuRegs.top = BigEndian(ipuRegs.top); - break; - } - - return true; -} - -MULTI_ISA_UNSHARED_END diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h deleted file mode 100644 index d5ea1132bd..0000000000 --- a/pcsx2/IPU/mpeg2lib/Mpeg.h +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Mpeg.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -#include "IPU/IPU.h" - -#include "GS/MultiISA.h" - -#include "common/Assertions.h" - -// the IPU is fixed to 16 byte strides (128-bit / QWC resolution): -static const uint decoder_stride = 16; - -enum macroblock_modes -{ - MACROBLOCK_INTRA = 1, - MACROBLOCK_PATTERN = 2, - MACROBLOCK_MOTION_BACKWARD = 4, - MACROBLOCK_MOTION_FORWARD = 8, - MACROBLOCK_QUANT = 16, - DCT_TYPE_INTERLACED = 32 -}; - -enum motion_type -{ - MOTION_TYPE_SHIFT = 6, - MOTION_TYPE_MASK = (3*64), - MOTION_TYPE_BASE = 64, - MC_FIELD = (1*64), - MC_FRAME = (2*64), - MC_16X8 = (2*64), - MC_DMV = (3*64) -}; - -/* picture structure */ -enum picture_structure -{ - TOP_FIELD = 1, - BOTTOM_FIELD = 2, - FRAME_PICTURE = 3 -}; - -/* picture coding type */ -enum picture_coding_type -{ - I_TYPE = 1, - P_TYPE = 2, - B_TYPE = 3, - D_TYPE = 4 -}; - -struct macroblock_8{ - u8 Y[16][16]; //0 - u8 Cb[8][8]; //1 - u8 Cr[8][8]; //2 -}; - -struct macroblock_16{ - s16 Y[16][16]; //0 - s16 Cb[8][8]; //1 - s16 Cr[8][8]; //2 -}; - -struct macroblock_rgb32{ - struct { - u8 r, g, b, a; - } c[16][16]; -}; - -struct rgb16_t{ - u16 r:5, g:5, b:5, a:1; -}; - -struct macroblock_rgb16{ - rgb16_t c[16][16]; -}; - -struct decoder_t { - /* first, state that carries information from one macroblock to the */ - /* next inside a slice, and is never used outside of mpeg2_slice() */ - - /* DCT coefficients - should be kept aligned ! */ - s16 DCTblock[64]; - - u8 niq[64]; //non-intraquant matrix (sequence header) - u8 iq[64]; //intraquant matrix (sequence header) - - macroblock_8 mb8; - macroblock_16 mb16; - macroblock_rgb32 rgb32; - macroblock_rgb16 rgb16; - - uint ipu0_data; // amount of data in the output macroblock (in QWC) - uint ipu0_idx; - - int quantizer_scale; - - /* now non-slice-specific information */ - - /* picture header stuff */ - - /* what type of picture this is (I, P, B, D) */ - int coding_type; - - /* picture coding extension stuff */ - - /* predictor for DC coefficients in intra blocks */ - s16 dc_dct_pred[3]; - - /* quantization factor for intra dc coefficients */ - int intra_dc_precision; - /* top/bottom/both fields */ - int picture_structure; - /* bool to indicate all predictions are frame based */ - int frame_pred_frame_dct; - /* bool to indicate whether intra blocks have motion vectors */ - /* (for concealment) */ - int concealment_motion_vectors; - /* bit to indicate which quantization table to use */ - int q_scale_type; - /* bool to use different vlc tables */ - int intra_vlc_format; - /* used for DMV MC */ - int top_field_first; - // Pseudo Sign Offset - int sgn; - // Dither Enable - int dte; - // Output Format - int ofm; - // Macroblock type - int macroblock_modes; - // DC Reset - int dcr; - // Coded block pattern - int coded_block_pattern; - - /* stuff derived from bitstream */ - - /* the zigzag scan we're supposed to be using, true for alt, false for normal */ - bool scantype; - - int mpeg1; - - template< typename T > - void SetOutputTo( T& obj ) - { - uint mb_offset = ((uptr)&obj - (uptr)&mb8); - pxAssume( (mb_offset & 15) == 0 ); - ipu0_idx = mb_offset / 16; - ipu0_data = sizeof(obj)/16; - } - - u128* GetIpuDataPtr() - { - return ((u128*)&mb8) + ipu0_idx; - } - - void AdvanceIpuDataBy(uint amt) - { - pxAssertMsg(ipu0_data>=amt, "IPU FIFO Overflow on advance!" ); - ipu0_idx += amt; - ipu0_data -= amt; - } -}; - -struct mpeg2_scan_pack -{ - u8 norm[64]; - u8 alt[64]; -}; - -extern u32 UBITS(uint bits); -extern s32 SBITS(uint bits); - -MULTI_ISA_DEF( - extern int bitstream_init(); - - extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride); - extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride); - - extern bool mpeg2sliceIDEC(); - extern bool mpeg2_slice(); - extern int get_macroblock_address_increment(); - extern int get_macroblock_modes(); - - extern int get_motion_delta(const int f_code); - extern int get_dmv(); - - extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn); - extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte); - extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4); - - extern int slice (u8 * buffer); -) - -#ifdef _MSC_VER -#define BigEndian(in) _byteswap_ulong(in) -#else -#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap... -#endif - -#ifdef _MSC_VER -#define BigEndian64(in) _byteswap_uint64(in) -#else -#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap... -#endif - -alignas(16) extern const mpeg2_scan_pack mpeg2_scan; -extern const int non_linear_quantizer_scale[]; - -// The IPU can only do one task at once and never uses other buffers so all mpeg state variables -// are made available to mpeg/vlc modules as globals here: - -alignas(16) extern tIPU_BP g_BP; -alignas(16) extern decoder_t decoder; - diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h deleted file mode 100644 index 61bcd791b5..0000000000 --- a/pcsx2/IPU/mpeg2lib/Vlc.h +++ /dev/null @@ -1,663 +0,0 @@ -/* - * vlc.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * Modified by Florin for PCSX2 emu - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// WARNING! This file should only be included into Mpeg.cpp AND NOWHERE ELSE. -// All contents of this file are used only by Mpeg.cpp, and including it elsewhere will -// just result in the linker having to remove a whole lot of redundant/unused decoder -// tables and static functions. -- air - -#pragma once - -static __fi int GETWORD() -{ - return g_BP.FillBuffer(16); -} - -// Removes bits from the bitstream. This is done independently of UBITS/SBITS because a -// lot of mpeg streams have to read ahead and rewind bits and re-read them at different -// bit depths or sign'age. -static __fi void DUMPBITS(uint num) -{ - g_BP.Advance(num); - //pxAssume(g_BP.FP != 0); -} - -static __fi u32 GETBITS(uint num) -{ - uint retVal = UBITS(num); - g_BP.Advance(num); - - return retVal; -} - -struct MBtab { - u8 modes; - u8 len; -}; - -struct MVtab { - u8 delta; - u8 len; -}; - -struct DMVtab { - s8 dmv; - u8 len; -}; - -struct CBPtab { - u8 cbp; - u8 len; -}; - -struct DCtab { - u8 size; - u8 len; -}; - -struct DCTtab { - u8 run; - u8 level; - u8 len; -}; - -struct MBAtab { - u8 mba; - u8 len; -}; - - -#define INTRA MACROBLOCK_INTRA -#define QUANT MACROBLOCK_QUANT - -static const MBtab MB_I [] = { - {INTRA|QUANT, 2}, {INTRA, 1} -}; - -#define MC MACROBLOCK_MOTION_FORWARD -#define CODED MACROBLOCK_PATTERN - -alignas(16) static const MBtab MB_P [] = { - {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, - {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} -}; - -#define FWD MACROBLOCK_MOTION_FORWARD -#define BWD MACROBLOCK_MOTION_BACKWARD -#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD - -alignas(16) static const MBtab MB_B [] = { - {0, 0}, {INTRA|QUANT, 6}, - {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, - {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, - {INTRA, 5}, {INTRA, 5}, - {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, - {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} -}; - -#undef INTRA -#undef QUANT -#undef MC -#undef CODED -#undef FWD -#undef BWD -#undef INTER - - -static const MVtab MV_4 [] = { - { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} -}; - -alignas(16) static const MVtab MV_10 [] = { - { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, - { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, - {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, - { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, - { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, - { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} -}; - - -static const DMVtab DMV_2 [] = { - { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} -}; - - -alignas(16) static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, - {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} -}; - -alignas(16) static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} -}; - -#if 0 // following tables are unused by PCSX2 - -static const DCtab DC_lum_5 [] = { - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} -}; - -static const DCtab DC_chrom_5 [] = { - {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} -}; - -static const DCtab DC_long [] = { - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} -}; - -static const DCTtab DCT_16 [] = { - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, - { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, - { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, - { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} -}; - -static const DCTtab DCT_15 [] = { - { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, - { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, - { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, - { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, - { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, - { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, - { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, - { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, - { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, - { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, - { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, - { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} -}; - -static const DCTtab DCT_13 [] = { - { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, - { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, - { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, - { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, - { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, - { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, - { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, - { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, - { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, - { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, - { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, - { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} -}; - -static const DCTtab DCT_B14_10 [] = { - { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, - { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} -}; - -static const DCTtab DCT_B14_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, - { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, - { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, - { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, - { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} -}; - -static const DCTtab DCT_B14AC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} -}; - -static const DCTtab DCT_B14DC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} -}; - -static const DCTtab DCT_B15_10 [] = { - { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, - { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} -}; - -static const DCTtab DCT_B15_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, - { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, - { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, - { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, - { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, - { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, - { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, - { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} -}; -#endif - -struct MBAtabSet -{ - MBAtab mba5[30]; - MBAtab mba11[26*4]; -}; -alignas(16) static const MBAtabSet MBA = { - { // mba5 - {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, - {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} - }, - - { // mba11 - {32, 11}, {31, 11}, {30, 11}, {29, 11}, - {28, 11}, {27, 11}, {26, 11}, {25, 11}, - {24, 11}, {23, 11}, {22, 11}, {21, 11}, - {20, 10}, {20, 10}, {19, 10}, {19, 10}, - {18, 10}, {18, 10}, {17, 10}, {17, 10}, - {16, 10}, {16, 10}, {15, 10}, {15, 10}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} - } -}; - -// New - -#if 0 // Not used by PCSX2 -/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */ -static MBAtab MBAtab1[16] = -{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4}, - {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3} -}; - -/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */ -static MBAtab MBAtab2[104] = -{ - {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11}, - {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10}, - {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10}, - {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, {15,8}, - {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, {14,8}, - {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, {13,8}, - {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, {12,8}, - {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, {11,8}, - {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, {10,8}, - {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, - {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, {9,7}, - {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, - {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7}, {8,7} -}; -#endif - -struct DCtabSet -{ - DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 - DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 - DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 - DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 -}; - -alignas(16) static const DCtabSet DCtable = -{ - // lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */ - { {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} }, - - /* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */ - { {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} }, - - /* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */ - { {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} }, - - /* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */ - { {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, - {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, - {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, - {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} }, -}; - -struct DCTtabSet -{ - DCTtab first[12]; - DCTtab next[12]; - - DCTtab tab0[60]; - DCTtab tab0a[252]; - DCTtab tab1[8]; - DCTtab tab1a[8]; - - DCTtab tab2[16]; - DCTtab tab3[16]; - DCTtab tab4[16]; - DCTtab tab5[16]; - DCTtab tab6[16]; -}; - -alignas(16) static const DCTtabSet DCT = -{ - /* first[12]: Table B-14, DCT coefficients table zero, - * codes 0100 ... 1xxx (used for first (DC) coefficient) - */ - { {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, - {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1}, - {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} }, - - /* next[12]: Table B-14, DCT coefficients table zero, - * codes 0100 ... 1xxx (used for all other coefficients) - */ - { {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3}, - {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */ - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2} }, - - /* tab0[60]: Table B-14, DCT coefficients table zero, - * codes 000001xx ... 00111xxx - */ - { {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ - {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7}, - {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7}, - {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6}, - {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6}, - {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6}, - {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, - {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8}, - {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8}, - {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, - {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5}, - {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, - {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} }, - - /* tab0a[252]: Table B-15, DCT coefficients table one, - * codes 000001xx ... 11111111 - */ - { {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */ - {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7}, - {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7}, - {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6}, - {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6}, - {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6}, - {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6}, - {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8}, - {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8}, - {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, - {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5}, - {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, - {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, - {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3}, - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */ - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, - {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3}, - {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, - {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5}, - {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, - {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5}, - {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7}, - {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7}, - {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8}, - {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} }, - - /* Table B-14, DCT coefficients table zero, - * codes 0000001000 ... 0000001111 - */ - { {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10}, - {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} }, - - /* Table B-15, DCT coefficients table one, - * codes 000000100x ... 000000111x - */ - { {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9}, - {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 000000010000 ... 000000011111 - */ - { {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12}, - {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12}, - {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12}, - {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 0000000010000 ... 0000000011111 - */ - { {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13}, - {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13}, - {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13}, - {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 00000000010000 ... 00000000011111 - */ - { {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14}, - {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14}, - {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14}, - {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 000000000010000 ... 000000000011111 - */ - { {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15}, - {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15}, - {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15}, - {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} }, - - /* Table B-14/15, DCT coefficients table zero / one, - * codes 0000000000010000 ... 0000000000011111 - */ - { {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16}, - {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16}, - {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16}, - {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} } - -}; diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 51db3182e5..c0ef19b3d2 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -20,9 +20,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "IPU.h" -#include "yuv2rgb.h" -#include "mpeg2lib/Mpeg.h" +#include "IPU/IPU.h" +#include "IPU/IPU_MultiISA.h" +#include "IPU/yuv2rgb.h" // The IPU's colour space conversion conforms to ITU-R Recommendation BT.601 if anyone wants to make a // faster or "more accurate" implementation, but this is the precise documented integer method used by diff --git a/pcsx2/pcsx2core.vcxproj b/pcsx2/pcsx2core.vcxproj index 46018909b2..ed26318a7d 100644 --- a/pcsx2/pcsx2core.vcxproj +++ b/pcsx2/pcsx2core.vcxproj @@ -450,8 +450,6 @@ - - @@ -579,6 +577,7 @@ + @@ -782,8 +781,6 @@ - - diff --git a/pcsx2/pcsx2core.vcxproj.filters b/pcsx2/pcsx2core.vcxproj.filters index a29bd0c053..4bea3e0524 100644 --- a/pcsx2/pcsx2core.vcxproj.filters +++ b/pcsx2/pcsx2core.vcxproj.filters @@ -94,9 +94,6 @@ {4dab2d06-69e0-4f3e-b6d3-45e5e85af940} - - {67e51016-d1db-44d2-910d-349d2833f798} - {5602cc18-9d1c-49c8-9509-7e4cf9ecd91b} @@ -653,12 +650,6 @@ System\Ps2\IPU - - System\Ps2\IPU\mpeg2lib - - - System\Ps2\IPU\mpeg2lib - System\Ps2\GS\GIF @@ -1619,12 +1610,6 @@ System\Ps2\IPU - - System\Ps2\IPU\mpeg2lib - - - System\Ps2\IPU\mpeg2lib - System\Ps2\Debug @@ -2345,6 +2330,9 @@ System\Ps2\GS\Renderers\Hardware + + System\Ps2\IPU +