diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
index 7d9ccc121a..140e036c50 100644
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@@ -687,8 +687,6 @@ set(pcsx2IPUSources
 set(pcsx2IPUSourcesUnshared
 	IPU/IPU_MultiISA.cpp
 	IPU/IPUdither.cpp
-	IPU/mpeg2lib/Idct.cpp
-	IPU/mpeg2lib/Mpeg.cpp
 	IPU/yuv2rgb.cpp
 )
 
@@ -698,8 +696,7 @@ set(pcsx2IPUHeaders
 	IPU/IPU_Fifo.h
 	IPU/IPU_MultiISA.h
 	IPU/IPUdma.h
-	IPU/mpeg2lib/Mpeg.h
-	IPU/mpeg2lib/Vlc.h
+	IPU/mpeg2_vlc.h
 	IPU/yuv2rgb.h
 )
 
diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp
index 3aa070fcd7..b0e366c74a 100644
--- a/pcsx2/IPU/IPU.cpp
+++ b/pcsx2/IPU/IPU.cpp
@@ -19,7 +19,6 @@
 #include "IPU.h"
 #include "IPU_MultiISA.h"
 #include "IPUdma.h"
-#include "mpeg2lib/Mpeg.h"
 
 #include <limits.h>
 #include "Config.h"
@@ -46,10 +45,47 @@ int coded_block_pattern = 0;
 
 alignas(16) u8 g_ipu_indx4[16*16/2];
 
+alignas(16) const int non_linear_quantizer_scale[32] =
+{
+	0,  1,  2,  3,  4,  5,	6,	7,
+	8, 10, 12, 14, 16, 18,  20,  22,
+	24, 28, 32, 36, 40, 44,  48,  52,
+	56, 64, 72, 80, 88, 96, 104, 112
+};
+
 uint eecount_on_last_vdec = 0;
 bool FMVstarted = false;
 bool EnableFMV = false;
 
+// Also defined in IPU_MultiISA.cpp, but IPU.cpp is not unshared.
+// whenever reading fractions of bytes. The low bits always come from the next byte
+// while the high bits come from the current byte
+__ri static u8 getBits32(u8* address, bool advance)
+{
+	if (!g_BP.FillBuffer(32))
+		return 0;
+
+	const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP / 8];
+
+	if (uint shift = (g_BP.BP & 7))
+	{
+		u32 mask = (0xff >> shift);
+		mask = mask | (mask << 8) | (mask << 16) | (mask << 24);
+
+		*(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift);
+	}
+	else
+	{
+		// Bit position-aligned -- no masking/shifting necessary
+		*(u32*)address = *(u32*)readpos;
+	}
+
+	if (advance)
+		g_BP.Advance(32);
+
+	return 1;
+}
+
 void tIPU_cmd::clear()
 {
 	memzero_sse_a(*this);
@@ -89,7 +125,6 @@ void ReportIPU()
 	Console.WriteLn("thresh = 0x%x.", g_ipu_thresh);
 	Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern);
 	Console.WriteLn("g_decoder = 0x%x.", &decoder);
-	Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan);
 	Console.WriteLn(ipu_cmd.desc());
 	Console.Newline();
 }
@@ -346,7 +381,6 @@ __fi bool ipuWrite64(u32 mem, u64 value)
 	return true;
 }
 
-
 //////////////////////////////////////////////////////
 // IPU Commands (exec on worker thread only)
 
@@ -416,130 +450,6 @@ static void ipuSETTH(u32 val)
 	IPU_LOG("SETTH (Set threshold value)command %x.", val&0x1ff01ff);
 }
 
-// --------------------------------------------------------------------------------------
-//  Buffer reader
-// --------------------------------------------------------------------------------------
-
-__ri u32 UBITS(uint bits)
-{
-	uint readpos8 = g_BP.BP/8;
-
-	uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 ));
-	uint bp7 = (g_BP.BP & 7);
-	result <<= bp7;
-	result >>= (32 - bits);
-
-	return result;
-}
-
-__ri s32 SBITS(uint bits)
-{
-	// Read an unaligned 32 bit value and then shift the bits up and then back down.
-
-	uint readpos8 = g_BP.BP/8;
-
-	int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 ));
-	uint bp7 = (g_BP.BP & 7);
-	result <<= bp7;
-	result >>= (32 - bits);
-
-	return result;
-}
-
-// whenever reading fractions of bytes. The low bits always come from the next byte
-// while the high bits come from the current byte
-u8 getBits64(u8 *address, bool advance)
-{
-	if (!g_BP.FillBuffer(64)) return 0;
-
-	const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
-
-	if (uint shift = (g_BP.BP & 7))
-	{
-		u64 mask = (0xff >> shift);
-		mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56);
-
-		*(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift);
-	}
-	else
-	{
-		*(u64*)address = *(u64*)readpos;
-	}
-
-	if (advance) g_BP.Advance(64);
-
-	return 1;
-}
-
-// whenever reading fractions of bytes. The low bits always come from the next byte
-// while the high bits come from the current byte
-__fi u8 getBits32(u8 *address, bool advance)
-{
-	if (!g_BP.FillBuffer(32)) return 0;
-
-	const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8];
-
-	if(uint shift = (g_BP.BP & 7))
-	{
-		u32 mask = (0xff >> shift);
-		mask = mask | (mask << 8) | (mask << 16) | (mask << 24);
-
-		*(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift);
-	}
-	else
-	{
-		// Bit position-aligned -- no masking/shifting necessary
-		*(u32*)address = *(u32*)readpos;
-	}
-
-	if (advance) g_BP.Advance(32);
-
-	return 1;
-}
-
-__fi u8 getBits16(u8 *address, bool advance)
-{
-	if (!g_BP.FillBuffer(16)) return 0;
-
-	const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
-
-	if (uint shift = (g_BP.BP & 7))
-	{
-		uint mask = (0xff >> shift);
-		mask = mask | (mask << 8);
-		*(u16*)address = ((~mask & *(u16*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u16*)readpos) << shift);
-	}
-	else
-	{
-		*(u16*)address = *(u16*)readpos;
-	}
-
-	if (advance) g_BP.Advance(16);
-
-	return 1;
-}
-
-u8 getBits8(u8 *address, bool advance)
-{
-	if (!g_BP.FillBuffer(8)) return 0;
-
-	const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
-
-	if (uint shift = (g_BP.BP & 7))
-	{
-		uint mask = (0xff >> shift);
-		*(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift);
-	}
-	else
-	{
-		*(u8*)address = *(u8*)readpos;
-	}
-
-	if (advance) g_BP.Advance(8);
-
-	return 1;
-}
-
 // --------------------------------------------------------------------------------------
 //  IPU Worker / Dispatcher
 // --------------------------------------------------------------------------------------
diff --git a/pcsx2/IPU/IPU.h b/pcsx2/IPU/IPU.h
index fcfd2ebe78..4dc8cc03ea 100644
--- a/pcsx2/IPU/IPU.h
+++ b/pcsx2/IPU/IPU.h
@@ -293,7 +293,6 @@ extern bool EnableFMV;
 
 alignas(16) extern tIPU_cmd ipu_cmd;
 extern uint eecount_on_last_vdec;
-extern int coded_block_pattern;
 extern bool CommandExecuteQueued;
 
 extern void ipuReset();
@@ -307,8 +306,3 @@ extern void IPUCMD_WRITE(u32 val);
 extern void ipuSoftReset();
 extern void IPUProcessInterrupt();
 
-extern u8 getBits64(u8 *address, bool advance);
-extern u8 getBits32(u8 *address, bool advance);
-extern u8 getBits16(u8 *address, bool advance);
-extern u8 getBits8(u8 *address, bool advance);
-
diff --git a/pcsx2/IPU/IPU_Fifo.cpp b/pcsx2/IPU/IPU_Fifo.cpp
index 9765b97b96..3df82a3a27 100644
--- a/pcsx2/IPU/IPU_Fifo.cpp
+++ b/pcsx2/IPU/IPU_Fifo.cpp
@@ -15,9 +15,9 @@
 
 #include "PrecompiledHeader.h"
 #include "Common.h"
-#include "IPU.h"
+#include "IPU/IPU.h"
 #include "IPU/IPUdma.h"
-#include "mpeg2lib/Mpeg.h"
+#include "IPU/IPU_MultiISA.h"
 
 alignas(16) IPU_Fifo ipu_fifo;
 
diff --git a/pcsx2/IPU/IPU_MultiISA.cpp b/pcsx2/IPU/IPU_MultiISA.cpp
index 7c94d975e2..1c8d323649 100644
--- a/pcsx2/IPU/IPU_MultiISA.cpp
+++ b/pcsx2/IPU/IPU_MultiISA.cpp
@@ -11,20 +11,1575 @@
  *
  *  You should have received a copy of the GNU General Public License along with PCSX2.
  *  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  Some of the functions in this file are based on the mpeg2dec library,
+ * 
+ *  Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ *  Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *  Modified by Florin for PCSX2 emu
+ *
+ *  under the GPL license. However, they have been heavily rewritten for PCSX2 usage.
+ *  The original author's copyright statement is included above for completeness sake.
  */
 
-#include "IPU_MultiISA.h"
+#include "IPU/IPU.h"
+#include "IPU/IPUdma.h"
+#include "IPU/yuv2rgb.h"
+#include "IPU/IPU_MultiISA.h"
+#include "common/MemsetFast.inl"
 
-#include "IPU.h"
-#include "IPUdma.h"
-#include "yuv2rgb.h"
+// the IPU is fixed to 16 byte strides (128-bit / QWC resolution):
+static const uint decoder_stride = 16;
+
+#if MULTI_ISA_COMPILE_ONCE
+
+static constexpr std::array<u8, 1024> make_clip_lut()
+{
+	std::array<u8, 1024> lut = {};
+	for (int i = -384; i < 640; i++)
+		lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+	return lut;
+}
+
+static constexpr mpeg2_scan_pack make_scan_pack()
+{
+	constexpr u8 mpeg2_scan_norm[64] = {
+		/* Zig-Zag scan pattern */
+		0,  1,  8,  16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+		12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+		35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+		58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+	};
+
+	constexpr u8 mpeg2_scan_alt[64] = {
+		/* Alternate scan pattern */
+		0,  8,  16, 24,  1,  9,  2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
+		41, 33, 26, 18,  3, 11,  4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
+		51, 59, 20, 28,  5, 13,  6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
+		53, 61, 22, 30,  7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
+	};
+
+	mpeg2_scan_pack pack = {};
+
+	for (int i = 0; i < 64; i++) {
+		int j = mpeg2_scan_norm[i];
+		pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+		j = mpeg2_scan_alt[i];
+		pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+	}
+
+	return pack;
+}
+
+alignas(16) const std::array<u8, 1024> g_idct_clip_lut = make_clip_lut();
+alignas(16) const mpeg2_scan_pack mpeg2_scan = make_scan_pack();
+
+#endif
 
 MULTI_ISA_UNSHARED_START
 
+static void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
+static void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
+
+// --------------------------------------------------------------------------------------
+//  Buffer reader
+// --------------------------------------------------------------------------------------
+
+__ri static u32 UBITS(uint bits)
+{
+	uint readpos8 = g_BP.BP/8;
+
+	uint result = BigEndian(*(u32*)( (u8*)g_BP.internal_qwc + readpos8 ));
+	uint bp7 = (g_BP.BP & 7);
+	result <<= bp7;
+	result >>= (32 - bits);
+
+	return result;
+}
+
+__ri static s32 SBITS(uint bits)
+{
+	// Read an unaligned 32 bit value and then shift the bits up and then back down.
+
+	uint readpos8 = g_BP.BP/8;
+
+	int result = BigEndian(*(s32*)( (s8*)g_BP.internal_qwc + readpos8 ));
+	uint bp7 = (g_BP.BP & 7);
+	result <<= bp7;
+	result >>= (32 - bits);
+
+	return result;
+}
+
+__fi static int GETWORD()
+{
+	return g_BP.FillBuffer(16);
+}
+
+// Removes bits from the bitstream.  This is done independently of UBITS/SBITS because a
+// lot of mpeg streams have to read ahead and rewind bits and re-read them at different
+// bit depths or sign'age.
+__fi static void DUMPBITS(uint num)
+{
+	g_BP.Advance(num);
+	//pxAssume(g_BP.FP != 0);
+}
+
+__fi static u32 GETBITS(uint num)
+{
+	uint retVal = UBITS(num);
+	g_BP.Advance(num);
+
+	return retVal;
+}
+
+// whenever reading fractions of bytes. The low bits always come from the next byte
+// while the high bits come from the current byte
+__ri static u8 getBits64(u8 *address, bool advance)
+{
+	if (!g_BP.FillBuffer(64)) return 0;
+
+	const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
+
+	if (uint shift = (g_BP.BP & 7))
+	{
+		u64 mask = (0xff >> shift);
+		mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56);
+
+		*(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift);
+	}
+	else
+	{
+		*(u64*)address = *(u64*)readpos;
+	}
+
+	if (advance) g_BP.Advance(64);
+
+	return 1;
+}
+
+// whenever reading fractions of bytes. The low bits always come from the next byte
+// while the high bits come from the current byte
+__ri static u8 getBits32(u8 *address, bool advance)
+{
+	if (!g_BP.FillBuffer(32)) return 0;
+
+	const u8* readpos = &g_BP.internal_qwc->_u8[g_BP.BP/8];
+
+	if(uint shift = (g_BP.BP & 7))
+	{
+		u32 mask = (0xff >> shift);
+		mask = mask | (mask << 8) | (mask << 16) | (mask << 24);
+
+		*(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift);
+	}
+	else
+	{
+		// Bit position-aligned -- no masking/shifting necessary
+		*(u32*)address = *(u32*)readpos;
+	}
+
+	if (advance) g_BP.Advance(32);
+
+	return 1;
+}
+
+__ri static u8 getBits8(u8 *address, bool advance)
+{
+	if (!g_BP.FillBuffer(8)) return 0;
+
+	const u8* readpos = &g_BP.internal_qwc[0]._u8[g_BP.BP/8];
+
+	if (uint shift = (g_BP.BP & 7))
+	{
+		uint mask = (0xff >> shift);
+		*(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift);
+	}
+	else
+	{
+		*(u8*)address = *(u8*)readpos;
+	}
+
+	if (advance) g_BP.Advance(8);
+
+	return 1;
+}
+
+
+#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
+#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
+#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
+#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
+#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
+#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
+
+/*
+ * In legal streams, the IDCT output should be between -384 and +384.
+ * In corrupted streams, it is possible to force the IDCT output to go
+ * to +-3826 - this is the worst case for a column IDCT where the
+ * column inputs are 16-bit values.
+ */
+
+__fi static void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1)
+{
+	int tmp = w0 * (d0 + d1);
+	t0 = tmp + (w1 - w0) * d1;
+	t1 = tmp - (w1 + w0) * d0;
+}
+
+__ri static void IDCT_Block(s16* block)
+{
+	for (int i = 0; i < 8; i++)
+	{
+		s16* const rblock = block + 8 * i;
+		if (!(rblock[1] | ((s32*)rblock)[1] | ((s32*)rblock)[2] |
+				((s32*)rblock)[3]))
+		{
+			u32 tmp = (u16)(rblock[0] << 3);
+			tmp |= tmp << 16;
+			((s32*)rblock)[0] = tmp;
+			((s32*)rblock)[1] = tmp;
+			((s32*)rblock)[2] = tmp;
+			((s32*)rblock)[3] = tmp;
+			continue;
+		}
+
+		int a0, a1, a2, a3;
+		{
+			const int d0 = (rblock[0] << 11) + 128;
+			const int d1 = rblock[1];
+			const int d2 = rblock[2] << 11;
+			const int d3 = rblock[3];
+			int t0 = d0 + d2;
+			int t1 = d0 - d2;
+			int t2, t3;
+			BUTTERFLY(t2, t3, W6, W2, d3, d1);
+			a0 = t0 + t2;
+			a1 = t1 + t3;
+			a2 = t1 - t3;
+			a3 = t0 - t2;
+		}
+
+		int b0, b1, b2, b3;
+		{
+			const int d0 = rblock[4];
+			const int d1 = rblock[5];
+			const int d2 = rblock[6];
+			const int d3 = rblock[7];
+			int t0, t1, t2, t3;
+			BUTTERFLY(t0, t1, W7, W1, d3, d0);
+			BUTTERFLY(t2, t3, W3, W5, d1, d2);
+			b0 = t0 + t2;
+			b3 = t1 + t3;
+			t0 -= t2;
+			t1 -= t3;
+			b1 = ((t0 + t1) * 181) >> 8;
+			b2 = ((t0 - t1) * 181) >> 8;
+		}
+
+		rblock[0] = (a0 + b0) >> 8;
+		rblock[1] = (a1 + b1) >> 8;
+		rblock[2] = (a2 + b2) >> 8;
+		rblock[3] = (a3 + b3) >> 8;
+		rblock[4] = (a3 - b3) >> 8;
+		rblock[5] = (a2 - b2) >> 8;
+		rblock[6] = (a1 - b1) >> 8;
+		rblock[7] = (a0 - b0) >> 8;
+	}
+
+	for (int i = 0; i < 8; i++)
+	{
+		s16* const cblock = block + i;
+
+		int a0, a1, a2, a3;
+		{
+			const int d0 = (cblock[8 * 0] << 11) + 65536;
+			const int d1 = cblock[8 * 1];
+			const int d2 = cblock[8 * 2] << 11;
+			const int d3 = cblock[8 * 3];
+			const int t0 = d0 + d2;
+			const int t1 = d0 - d2;
+			int t2;
+			int t3;
+			BUTTERFLY(t2, t3, W6, W2, d3, d1);
+			a0 = t0 + t2;
+			a1 = t1 + t3;
+			a2 = t1 - t3;
+			a3 = t0 - t2;
+		}
+
+		int b0, b1, b2, b3;
+		{
+			const int d0 = cblock[8 * 4];
+			const int d1 = cblock[8 * 5];
+			const int d2 = cblock[8 * 6];
+			const int d3 = cblock[8 * 7];
+			int t0, t1, t2, t3;
+			BUTTERFLY(t0, t1, W7, W1, d3, d0);
+			BUTTERFLY(t2, t3, W3, W5, d1, d2);
+			b0 = t0 + t2;
+			b3 = t1 + t3;
+			t0 = (t0 - t2) >> 8;
+			t1 = (t1 - t3) >> 8;
+			b1 = (t0 + t1) * 181;
+			b2 = (t0 - t1) * 181;
+		}
+
+		cblock[8 * 0] = (a0 + b0) >> 17;
+		cblock[8 * 1] = (a1 + b1) >> 17;
+		cblock[8 * 2] = (a2 + b2) >> 17;
+		cblock[8 * 3] = (a3 + b3) >> 17;
+		cblock[8 * 4] = (a3 - b3) >> 17;
+		cblock[8 * 5] = (a2 - b2) >> 17;
+		cblock[8 * 6] = (a1 - b1) >> 17;
+		cblock[8 * 7] = (a0 - b0) >> 17;
+	}
+}
+
+__ri static void IDCT_Copy(s16* block, u8* dest, const int stride)
+{
+	IDCT_Block(block);
+
+	for (int i = 0; i < 8; i++)
+	{
+		dest[0] = (g_idct_clip_lut.data() + 384)[block[0]];
+		dest[1] = (g_idct_clip_lut.data() + 384)[block[1]];
+		dest[2] = (g_idct_clip_lut.data() + 384)[block[2]];
+		dest[3] = (g_idct_clip_lut.data() + 384)[block[3]];
+		dest[4] = (g_idct_clip_lut.data() + 384)[block[4]];
+		dest[5] = (g_idct_clip_lut.data() + 384)[block[5]];
+		dest[6] = (g_idct_clip_lut.data() + 384)[block[6]];
+		dest[7] = (g_idct_clip_lut.data() + 384)[block[7]];
+
+		std::memset(block, 0, 16);
+
+		dest += stride;
+		block += 8;
+	}
+}
+
+
+// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]).
+__ri static void IDCT_Add(const int last, s16* block, s16* dest, const int stride)
+{
+	// on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0).
+
+	if (last != 129 || (block[0] & 7) == 4)
+	{
+		IDCT_Block(block);
+
+		__m128 zero = _mm_setzero_ps();
+		for (int i = 0; i < 8; i++)
+		{
+			_mm_store_ps((float*)dest, _mm_load_ps((float*)block));
+			_mm_store_ps((float*)block, zero);
+
+			dest += stride;
+			block += 8;
+		}
+	}
+	else
+	{
+		s16 DC = ((int)block[0] + 4) >> 3;
+		s16 dcf[2] = {DC, DC};
+		block[0] = block[63] = 0;
+
+		__m128 dc128 = _mm_set_ps1(*(float*)dcf);
+
+		for (int i = 0; i < 8; ++i)
+			_mm_store_ps((float*)(dest + (stride * i)), dc128);
+	}
+}
+
+/* Bitstream and buffer needs to be reallocated in order for successful
+	reading of the old data. Here the old data stored in the 2nd slot
+	of the internal buffer is copied to 1st slot, and the new data read
+	into 1st slot is copied to the 2nd slot. Which will later be copied
+	back to the 1st slot when 128bits have been read.
+*/
+static const DCTtab * tab;
+static int mbaCount = 0;
+
+__ri static int BitstreamInit ()
+{
+	return g_BP.FillBuffer(32);
+}
+
+static int GetMacroblockModes()
+{
+	int macroblock_modes;
+	const MBtab * tab;
+
+	switch (decoder.coding_type)
+	{
+		case I_TYPE:
+			macroblock_modes = UBITS(2);
+
+			if (macroblock_modes == 0) return 0;   // error
+
+			tab = MB_I + (macroblock_modes >> 1);
+			DUMPBITS(tab->len);
+			macroblock_modes = tab->modes;
+
+			if ((!(decoder.frame_pred_frame_dct)) &&
+				(decoder.picture_structure == FRAME_PICTURE))
+			{
+				macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
+			}
+			return macroblock_modes;
+
+		case P_TYPE:
+			macroblock_modes = UBITS(6);
+
+			if (macroblock_modes == 0) return 0;   // error
+
+			tab = MB_P + (macroblock_modes >> 1);
+			DUMPBITS(tab->len);
+			macroblock_modes = tab->modes;
+
+			if (decoder.picture_structure != FRAME_PICTURE)
+			{
+				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+				{
+					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
+				}
+
+				return macroblock_modes;
+			}
+			else if (decoder.frame_pred_frame_dct)
+			{
+				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+					macroblock_modes |= MC_FRAME;
+
+				return macroblock_modes;
+			}
+			else
+			{
+				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+				{
+					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
+				}
+
+				if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
+				{
+					macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
+				}
+
+				return macroblock_modes;
+			}
+
+		case B_TYPE:
+			macroblock_modes = UBITS(6);
+
+			if (macroblock_modes == 0) return 0;   // error
+
+			tab = MB_B + macroblock_modes;
+			DUMPBITS(tab->len);
+			macroblock_modes = tab->modes;
+
+			if (decoder.picture_structure != FRAME_PICTURE)
+			{
+				if (!(macroblock_modes & MACROBLOCK_INTRA))
+				{
+					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
+				}
+				return (macroblock_modes | (tab->len << 16));
+			}
+			else if (decoder.frame_pred_frame_dct)
+			{
+				/* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
+				macroblock_modes |= MC_FRAME;
+				return (macroblock_modes | (tab->len << 16));
+			}
+			else
+			{
+				if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
+
+				macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
+
+				if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
+				{
+intra:
+					macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
+				}
+				return (macroblock_modes | (tab->len << 16));
+			}
+
+		case D_TYPE:
+			macroblock_modes = GETBITS(1);
+			//I suspect (as this is actually a 2 bit command) that this should be getbits(2)
+			//additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction)
+			DevCon.Warning(" Rare MPEG command! ");
+			if (macroblock_modes == 0) return 0;   // error
+			return (MACROBLOCK_INTRA | (1 << 16));
+
+		default:
+			return 0;
+	}
+}
+
+__ri static int get_macroblock_address_increment()
+{
+	const MBAtab *mba;
+
+	u16 code = UBITS(16);
+
+	if (code >= 4096)
+		mba = MBA.mba5 + (UBITS(5) - 2);
+	else if (code >= 768)
+		mba = MBA.mba11 + (UBITS(11) - 24);
+	else switch (UBITS(11))
+	{
+		case 8:		/* macroblock_escape */
+			DUMPBITS(11);
+			return 0xb0023;
+
+		case 15:	/* macroblock_stuffing (MPEG1 only) */
+			if (decoder.mpeg1)
+			{
+				DUMPBITS(11);
+				return 0xb0022;
+			}
+			[[fallthrough]];
+
+		default:
+			return 0;//error
+	}
+
+	DUMPBITS(mba->len);
+
+	return ((mba->mba + 1) | (mba->len << 16));
+}
+
+__fi static int get_luma_dc_dct_diff()
+{
+	int size;
+	int dc_diff;
+	u16 code = UBITS(5);
+
+	if (code < 31)
+	{
+		size = DCtable.lum0[code].size;
+		DUMPBITS(DCtable.lum0[code].len);
+
+		// 5 bits max
+	}
+	else
+	{
+		code = UBITS(9) - 0x1f0;
+		size = DCtable.lum1[code].size;
+		DUMPBITS(DCtable.lum1[code].len);
+
+		// 9 bits max
+	}
+
+	if (size==0)
+		dc_diff = 0;
+	else
+	{
+		dc_diff = GETBITS(size);
+
+		// 6 for tab0 and 11 for tab1
+		if ((dc_diff & (1<<(size-1)))==0)
+		  dc_diff-= (1<<size) - 1;
+	}
+
+	return dc_diff;
+}
+
+__fi static int get_chroma_dc_dct_diff()
+{
+	int size;
+	int dc_diff;
+	u16 code = UBITS(5);
+
+	if (code<31)
+	{
+		size = DCtable.chrom0[code].size;
+		DUMPBITS(DCtable.chrom0[code].len);
+	}
+	else
+	{
+		code = UBITS(10) - 0x3e0;
+		size = DCtable.chrom1[code].size;
+		DUMPBITS(DCtable.chrom1[code].len);
+	}
+
+	if (size==0)
+		dc_diff = 0;
+	else
+	{
+		dc_diff = GETBITS(size);
+
+		if ((dc_diff & (1<<(size-1)))==0)
+		{
+			dc_diff-= (1<<size) - 1;
+		}
+	}
+
+	return dc_diff;
+}
+
+__fi static void SATURATE(int& val)
+{
+	if ((u32)(val + 2048) > 4095)
+		val = (val >> 31) ^ 2047;
+}
+
+__ri static bool get_intra_block()
+{
+	const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
+	const u8 (&quant_matrix)[64] = decoder.iq;
+	int quantizer_scale = decoder.quantizer_scale;
+	s16 * dest = decoder.DCTblock;
+	u16 code;
+
+	/* decode AC coefficients */
+  for (int i=1 + ipu_cmd.pos[4]; ; i++)
+  {
+	  switch (ipu_cmd.pos[5])
+	  {
+	  case 0:
+		if (!GETWORD())
+		{
+		  ipu_cmd.pos[4] = i - 1;
+		  return false;
+		}
+
+		code = UBITS(16);
+
+		if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
+		{
+		  tab = &DCT.next[(code >> 12) - 4];
+		}
+		else if (code >= 1024)
+		{
+			if (decoder.intra_vlc_format && !decoder.mpeg1)
+			{
+				tab = &DCT.tab0a[(code >> 8) - 4];
+			}
+			else
+			{
+				tab = &DCT.tab0[(code >> 8) - 4];
+			}
+		}
+		else if (code >= 512)
+		{
+			if (decoder.intra_vlc_format && !decoder.mpeg1)
+			{
+				tab = &DCT.tab1a[(code >> 6) - 8];
+			}
+			else
+			{
+				tab = &DCT.tab1[(code >> 6) - 8];
+			}
+		}
+
+		// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
+		// that should use a single unrolled DCT table instead of five separate tables used
+		// here.  Multiple conditional statements are very slow, while modern CPU data caches
+		// have lots of room to spare.
+
+		else if (code >= 256)
+		{
+			tab = &DCT.tab2[(code >> 4) - 16];
+		}
+		else if (code >= 128)
+		{
+			tab = &DCT.tab3[(code >> 3) - 16];
+		}
+		else if (code >= 64)
+		{
+			tab = &DCT.tab4[(code >> 2) - 16];
+		}
+		else if (code >= 32)
+		{
+			tab = &DCT.tab5[(code >> 1) - 16];
+		}
+		else if (code >= 16)
+		{
+			tab = &DCT.tab6[code - 16];
+		}
+		else
+		{
+		  ipu_cmd.pos[4] = 0;
+		  return true;
+		}
+
+		DUMPBITS(tab->len);
+
+		if (tab->run==64) /* end_of_block */
+		{
+			ipu_cmd.pos[4] = 0;
+			return true;
+		}
+
+		i += (tab->run == 65) ? GETBITS(6) : tab->run;
+		if (i >= 64)
+		{
+			ipu_cmd.pos[4] = 0;
+			return true;
+		}
+		[[fallthrough]];
+
+	  case 1:
+	  {
+			if (!GETWORD())
+			{
+				ipu_cmd.pos[4] = i - 1;
+				ipu_cmd.pos[5] = 1;
+				return false;
+			}
+
+			uint j = scan[i];
+			int val;
+
+			if (tab->run==65) /* escape */
+			{
+				if(!decoder.mpeg1)
+				{
+				  val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
+				  DUMPBITS(12);
+				}
+				else
+				{
+				  val = SBITS(8);
+				  DUMPBITS(8);
+
+				  if (!(val & 0x7f))
+				  {
+					val = GETBITS(8) + 2 * val;
+				  }
+
+				  val = (val * quantizer_scale * quant_matrix[i]) >> 4;
+				  val = (val + ~ (((s32)val) >> 31)) | 1;
+				}
+			}
+			else
+			{
+				val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+				if(decoder.mpeg1)
+				{
+					/* oddification */
+					val = (val - 1) | 1;
+				}
+
+				/* if (bitstream_get (1)) val = -val; */
+				int bit1 = SBITS(1);
+				val = (val ^ bit1) - bit1;
+				DUMPBITS(1);
+			}
+
+			SATURATE(val);
+			dest[j] = val;
+			ipu_cmd.pos[5] = 0;
+		}
+	 }
+  }
+
+  ipu_cmd.pos[4] = 0;
+  return true;
+}
+
+__ri static bool get_non_intra_block(int * last)
+{
+	int i;
+	int j;
+	int val;
+	const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
+	const u8 (&quant_matrix)[64] = decoder.niq;
+	int quantizer_scale = decoder.quantizer_scale;
+	s16 * dest = decoder.DCTblock;
+	u16 code;
+
+	/* decode AC coefficients */
+	for (i= ipu_cmd.pos[4] ; ; i++)
+	{
+		switch (ipu_cmd.pos[5])
+		{
+		case 0:
+			if (!GETWORD())
+			{
+				ipu_cmd.pos[4] = i;
+				return false;
+			}
+
+			code = UBITS(16);
+
+			if (code >= 16384)
+			{
+				if (i==0)
+				{
+					tab = &DCT.first[(code >> 12) - 4];
+				}
+				else
+				{
+					tab = &DCT.next[(code >> 12)- 4];
+				}
+			}
+			else if (code >= 1024)
+			{
+				tab = &DCT.tab0[(code >> 8) - 4];
+			}
+			else if (code >= 512)
+			{
+				tab = &DCT.tab1[(code >> 6) - 8];
+			}
+
+			// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
+			// that should use a single unrolled DCT table instead of five separate tables used
+			// here.  Multiple conditional statements are very slow, while modern CPU data caches
+			// have lots of room to spare.
+
+			else if (code >= 256)
+			{
+				tab = &DCT.tab2[(code >> 4) - 16];
+			}
+			else if (code >= 128)
+			{
+				tab = &DCT.tab3[(code >> 3) - 16];
+			}
+			else if (code >= 64)
+			{
+				tab = &DCT.tab4[(code >> 2) - 16];
+			}
+			else if (code >= 32)
+			{
+				tab = &DCT.tab5[(code >> 1) - 16];
+			}
+			else if (code >= 16)
+			{
+				tab = &DCT.tab6[code - 16];
+			}
+			else
+			{
+				ipu_cmd.pos[4] = 0;
+				return true;
+			}
+
+			DUMPBITS(tab->len);
+
+			if (tab->run==64) /* end_of_block */
+			{
+				*last = i;
+				ipu_cmd.pos[4] = 0;
+				return true;
+			}
+
+			i += (tab->run == 65) ? GETBITS(6) : tab->run;
+			if (i >= 64)
+			{
+				*last = i;
+				ipu_cmd.pos[4] = 0;
+				return true;
+			}
+			[[fallthrough]];
+
+		case 1:
+			if (!GETWORD())
+			{
+			  ipu_cmd.pos[4] = i;
+			  ipu_cmd.pos[5] = 1;
+			  return false;
+			}
+
+			j = scan[i];
+
+			if (tab->run==65) /* escape */
+			{
+				if (!decoder.mpeg1)
+				{
+					val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
+					DUMPBITS(12);
+				}
+				else
+				{
+				  val = SBITS(8);
+				  DUMPBITS(8);
+
+				  if (!(val & 0x7f))
+				  {
+					val = GETBITS(8) + 2 * val;
+				  }
+
+				  val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
+				  val = (val + ~ (((s32)val) >> 31)) | 1;
+				}
+			}
+			else
+			{
+				int bit1 = SBITS(1);
+				val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
+				val = (val ^ bit1) - bit1;
+				DUMPBITS(1);
+			}
+
+			SATURATE(val);
+			dest[j] = val;
+			ipu_cmd.pos[5] = 0;
+		}
+	}
+
+	ipu_cmd.pos[4] = 0;
+	return true;
+}
+
+__ri static bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
+{
+	if (!skip || ipu_cmd.pos[3])
+	{
+		ipu_cmd.pos[3] = 0;
+		if (!GETWORD())
+		{
+			ipu_cmd.pos[3] = 1;
+			return false;
+		}
+
+		/* Get the intra DC coefficient and inverse quantize it */
+		if (cc == 0)
+			decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
+		else
+			decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
+
+		decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
+	}
+
+	if (!get_intra_block())
+	{
+		return false;
+	}
+
+	IDCT_Copy(decoder.DCTblock, dest, stride);
+
+	return true;
+}
+
+__ri static bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
+{
+	int last;
+
+	if (!skip)
+	{
+		memzero_sse_a(decoder.DCTblock);
+	}
+
+	if (!get_non_intra_block(&last))
+	{
+		return false;
+	}
+
+	IDCT_Add(last, decoder.DCTblock, dest, stride);
+
+	return true;
+}
+
+__fi static void finishmpeg2sliceIDEC()
+{
+	ipuRegs.ctrl.SCD = 0;
+	coded_block_pattern = decoder.coded_block_pattern;
+}
+
+__ri static bool mpeg2sliceIDEC()
+{
+	u16 code;
+
+	switch (ipu_cmd.pos[0])
+	{
+	case 0:
+		decoder.dc_dct_pred[0] =
+		decoder.dc_dct_pred[1] =
+		decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+
+		ipuRegs.top = 0;
+		ipuRegs.ctrl.ECD = 0;
+		[[fallthrough]];
+
+	case 1:
+		ipu_cmd.pos[0] = 1;
+		if (!BitstreamInit())
+		{
+			return false;
+		}
+		[[fallthrough]];
+
+	case 2:
+		ipu_cmd.pos[0] = 2;
+		while (1)
+		{
+			// IPU0 isn't ready for data, so let's wait for it to be
+			if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[1] <= 2)
+			{
+				return false;
+			}
+			macroblock_8& mb8 = decoder.mb8;
+			macroblock_rgb16& rgb16 = decoder.rgb16;
+			macroblock_rgb32& rgb32 = decoder.rgb32;
+
+			int DCT_offset, DCT_stride;
+			const MBAtab * mba;
+
+			switch (ipu_cmd.pos[1])
+			{
+			case 0:
+				decoder.macroblock_modes = GetMacroblockModes();
+
+				if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
+				{
+					const int quantizer_scale_code = GETBITS(5);
+					if (decoder.q_scale_type)
+						decoder.quantizer_scale = non_linear_quantizer_scale[quantizer_scale_code];
+					else
+						decoder.quantizer_scale = quantizer_scale_code << 1;
+				}
+
+				decoder.coded_block_pattern = 0x3F;//all 6 blocks
+				memzero_sse_a(mb8);
+				memzero_sse_a(rgb32);
+				[[fallthrough]];
+
+			case 1:
+				ipu_cmd.pos[1] = 1;
+
+				if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
+				{
+					DCT_offset = decoder_stride;
+					DCT_stride = decoder_stride * 2;
+				}
+				else
+				{
+					DCT_offset = decoder_stride * 8;
+					DCT_stride = decoder_stride;
+				}
+
+				switch (ipu_cmd.pos[2])
+				{
+				case 0:
+				case 1:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
+					{
+						ipu_cmd.pos[2] = 1;
+						return false;
+					}
+					[[fallthrough]];
+
+				case 2:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
+					{
+						ipu_cmd.pos[2] = 2;
+						return false;
+					}
+					[[fallthrough]];
+
+				case 3:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
+					{
+						ipu_cmd.pos[2] = 3;
+						return false;
+					}
+					[[fallthrough]];
+
+				case 4:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
+					{
+						ipu_cmd.pos[2] = 4;
+						return false;
+					}
+					[[fallthrough]];
+
+				case 5:
+					if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
+					{
+						ipu_cmd.pos[2] = 5;
+						return false;
+					}
+					[[fallthrough]];
+
+				case 6:
+					if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
+					{
+						ipu_cmd.pos[2] = 6;
+						return false;
+					}
+					break;
+
+				jNO_DEFAULT;
+				}
+
+				// Send The MacroBlock via DmaIpuFrom
+				ipu_csc(mb8, rgb32, decoder.sgn);
+
+				if (decoder.ofm == 0)
+					decoder.SetOutputTo(rgb32);
+				else
+				{
+					ipu_dither(rgb32, rgb16, decoder.dte);
+					decoder.SetOutputTo(rgb16);
+				}
+				[[fallthrough]];
+
+			case 2:
+			{
+
+				pxAssert(decoder.ipu0_data > 0);
+
+				uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
+				decoder.AdvanceIpuDataBy(read);
+
+				if (decoder.ipu0_data != 0)
+				{
+					// IPU FIFO filled up -- Will have to finish transferring later.
+					ipu_cmd.pos[1] = 2;
+					return false;
+				}
+
+				mbaCount = 0;
+				if (read)
+				{
+					ipu_cmd.pos[1] = 3;
+					return false;
+				}
+			}
+				[[fallthrough]];
+
+			case 3:
+				while (1)
+				{
+					if (!GETWORD())
+					{
+						ipu_cmd.pos[1] = 3;
+						return false;
+					}
+
+					code = UBITS(16);
+					if (code >= 0x1000)
+					{
+						mba = MBA.mba5 + (UBITS(5) - 2);
+						break;
+					}
+					else if (code >= 0x0300)
+					{
+						mba = MBA.mba11 + (UBITS(11) - 24);
+						break;
+					}
+					else switch (UBITS(11))
+					{
+						case 8:		/* macroblock_escape */
+							mbaCount += 33;
+							[[fallthrough]];
+
+						case 15:	/* macroblock_stuffing (MPEG1 only) */
+							DUMPBITS(11);
+							continue;
+
+						default:	/* end of slice/frame, or error? */
+						{
+							goto finish_idec;
+						}
+					}
+				}
+
+				DUMPBITS(mba->len);
+				mbaCount += mba->mba;
+
+				if (mbaCount)
+				{
+					decoder.dc_dct_pred[0] =
+					decoder.dc_dct_pred[1] =
+					decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+				}
+				[[fallthrough]];
+
+			case 4:
+				if (!GETWORD())
+				{
+					ipu_cmd.pos[1] = 4;
+					return false;
+				}
+				break;
+
+			jNO_DEFAULT;
+			}
+
+			ipu_cmd.pos[1] = 0;
+			ipu_cmd.pos[2] = 0;
+		}
+
+finish_idec:
+		finishmpeg2sliceIDEC();
+		[[fallthrough]];
+
+	case 3:
+	{
+		u8 bit8;
+		u32 start_check;
+		if (!getBits8((u8*)&bit8, 0))
+		{
+			ipu_cmd.pos[0] = 3;
+			return false;
+		}
+
+		if (bit8 == 0)
+		{
+			g_BP.Align();
+			do
+			{
+				if (!g_BP.FillBuffer(24))
+				{
+					ipu_cmd.pos[0] = 3;
+					return false;
+				}
+				start_check = UBITS(24);
+				if (start_check != 0)
+				{
+					if (start_check == 1)
+					{
+						ipuRegs.ctrl.SCD = 1;
+					}
+					else
+					{
+						ipuRegs.ctrl.ECD = 1;
+					}
+					break;
+				}
+				DUMPBITS(8);
+			} while (1);
+		}
+	}
+		[[fallthrough]];
+
+	case 4:
+		if (!getBits32((u8*)&ipuRegs.top, 0))
+		{
+			ipu_cmd.pos[0] = 4;
+			return false;
+		}
+
+		ipuRegs.top = BigEndian(ipuRegs.top);
+		break;
+
+	jNO_DEFAULT;
+	}
+
+	return true;
+}
+
+__fi static bool mpeg2_slice()
+{
+	int DCT_offset, DCT_stride;
+
+	macroblock_8& mb8 = decoder.mb8;
+	macroblock_16& mb16 = decoder.mb16;
+
+	switch (ipu_cmd.pos[0])
+	{
+	case 0:
+		if (decoder.dcr)
+		{
+			decoder.dc_dct_pred[0] =
+			decoder.dc_dct_pred[1] =
+			decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+		}
+
+		ipuRegs.ctrl.ECD = 0;
+		ipuRegs.top = 0;
+		memzero_sse_a(mb8);
+		memzero_sse_a(mb16);
+		[[fallthrough]];
+
+	case 1:
+		if (!BitstreamInit())
+		{
+			ipu_cmd.pos[0] = 1;
+			return false;
+		}
+		[[fallthrough]];
+
+	case 2:
+		ipu_cmd.pos[0] = 2;
+
+		// IPU0 isn't ready for data, so let's wait for it to be
+		if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[0] <= 3)
+		{
+			return false;
+		}
+
+		if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
+		{
+			DCT_offset = decoder_stride;
+			DCT_stride = decoder_stride * 2;
+		}
+		else
+		{
+			DCT_offset = decoder_stride * 8;
+			DCT_stride = decoder_stride;
+		}
+
+		if (decoder.macroblock_modes & MACROBLOCK_INTRA)
+		{
+			switch(ipu_cmd.pos[1])
+			{
+			case 0:
+				decoder.coded_block_pattern = 0x3F;
+				[[fallthrough]];
+
+			case 1:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
+				{
+					ipu_cmd.pos[1] = 1;
+					return false;
+				}
+				[[fallthrough]];
+
+			case 2:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
+				{
+					ipu_cmd.pos[1] = 2;
+					return false;
+				}
+				[[fallthrough]];
+
+			case 3:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
+				{
+					ipu_cmd.pos[1] = 3;
+					return false;
+				}
+				[[fallthrough]];
+
+			case 4:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
+				{
+					ipu_cmd.pos[1] = 4;
+					return false;
+				}
+				[[fallthrough]];
+
+			case 5:
+				if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
+				{
+					ipu_cmd.pos[1] = 5;
+					return false;
+				}
+				[[fallthrough]];
+
+			case 6:
+				if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
+				{
+					ipu_cmd.pos[1] = 6;
+					return false;
+				}
+				break;
+
+			jNO_DEFAULT;
+			}
+
+			// Copy macroblock8 to macroblock16 - without sign extension.
+			// Manually inlined due to MSVC refusing to inline the SSE-optimized version.
+			{
+				const u8	*s = (const u8*)&mb8;
+				u16			*d = (u16*)&mb16;
+
+				//Y  bias	- 16 * 16
+				//Cr bias	- 8 * 8
+				//Cb bias	- 8 * 8
+
+				__m128i zeroreg = _mm_setzero_si128();
+
+				for (uint i = 0; i < (256+64+64) / 32; ++i)
+				{
+					//*d++ = *s++;
+					__m128i woot1 = _mm_load_si128((__m128i*)s);
+					__m128i woot2 = _mm_load_si128((__m128i*)s+1);
+					_mm_store_si128((__m128i*)d,	_mm_unpacklo_epi8(woot1, zeroreg));
+					_mm_store_si128((__m128i*)d+1,	_mm_unpackhi_epi8(woot1, zeroreg));
+					_mm_store_si128((__m128i*)d+2,	_mm_unpacklo_epi8(woot2, zeroreg));
+					_mm_store_si128((__m128i*)d+3,	_mm_unpackhi_epi8(woot2, zeroreg));
+					s += 32;
+					d += 32;
+				}
+			}
+		}
+		else
+		{
+			if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
+			{
+				switch(ipu_cmd.pos[1])
+				{
+				case 0:
+					{
+						// Get coded block pattern
+						const CBPtab* tab;
+						u16 code = UBITS(16);
+
+						if (code >= 0x2000)
+							tab = CBP_7 + (UBITS(7) - 16);
+						else
+							tab = CBP_9 + UBITS(9);
+
+						DUMPBITS(tab->len);
+						decoder.coded_block_pattern = tab->cbp;
+					}
+					[[fallthrough]];
+
+				case 1:
+					if (decoder.coded_block_pattern & 0x20)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
+						{
+							ipu_cmd.pos[1] = 1;
+							return false;
+						}
+					}
+					[[fallthrough]];
+
+				case 2:
+					if (decoder.coded_block_pattern & 0x10)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
+						{
+							ipu_cmd.pos[1] = 2;
+							return false;
+						}
+					}
+					[[fallthrough]];
+
+				case 3:
+					if (decoder.coded_block_pattern & 0x08)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
+						{
+							ipu_cmd.pos[1] = 3;
+							return false;
+						}
+					}
+					[[fallthrough]];
+
+				case 4:
+					if (decoder.coded_block_pattern & 0x04)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
+						{
+							ipu_cmd.pos[1] = 4;
+							return false;
+						}
+					}
+					[[fallthrough]];
+
+				case 5:
+					if (decoder.coded_block_pattern & 0x2)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
+						{
+							ipu_cmd.pos[1] = 5;
+							return false;
+						}
+					}
+					[[fallthrough]];
+
+				case 6:
+					if (decoder.coded_block_pattern & 0x1)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
+						{
+							ipu_cmd.pos[1] = 6;
+							return false;
+						}
+					}
+					break;
+
+				jNO_DEFAULT;
+				}
+			}
+		}
+
+		// Send The MacroBlock via DmaIpuFrom
+		ipuRegs.ctrl.SCD = 0;
+		coded_block_pattern = decoder.coded_block_pattern;
+
+		decoder.SetOutputTo(mb16);
+		[[fallthrough]];
+
+	case 3:
+	{
+		pxAssert(decoder.ipu0_data > 0);
+
+		uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
+		decoder.AdvanceIpuDataBy(read);
+
+		if (decoder.ipu0_data != 0)
+		{
+			// IPU FIFO filled up -- Will have to finish transferring later.
+			ipu_cmd.pos[0] = 3;
+			return false;
+		}
+
+		mbaCount = 0;
+		if (read)
+		{
+			ipu_cmd.pos[0] = 4;
+			return false;
+		}
+	}
+		[[fallthrough]];
+
+	case 4:
+	{
+		u8 bit8;
+		u32 start_check;
+		if (!getBits8((u8*)&bit8, 0))
+		{
+			ipu_cmd.pos[0] = 4;
+			return false;
+		}
+
+		if (bit8 == 0)
+		{
+			g_BP.Align();
+			do
+			{
+				if (!g_BP.FillBuffer(24))
+				{
+					ipu_cmd.pos[0] = 4;
+					return false;
+				}
+				start_check = UBITS(24);
+				if (start_check != 0)
+				{
+					if (start_check == 1)
+					{
+						ipuRegs.ctrl.SCD = 1;
+					}
+					else
+					{
+						ipuRegs.ctrl.ECD = 1;
+					}
+					break;
+				}
+				DUMPBITS(8);
+			} while (1);
+		}
+	}
+		[[fallthrough]];
+
+	case 5:
+		if (!getBits32((u8*)&ipuRegs.top, 0))
+		{
+			ipu_cmd.pos[0] = 5;
+			return false;
+		}
+
+		ipuRegs.top = BigEndian(ipuRegs.top);
+		break;
+	}
+
+	return true;
+}
+
+
 //////////////////////////////////////////////////////
 // IPU Commands (exec on worker thread only)
 
-static __fi bool ipuVDEC(u32 val)
+__fi static bool ipuVDEC(u32 val)
 {
 	static int count = 0;
 	if (count++ > 5) {
@@ -39,7 +1594,7 @@ static __fi bool ipuVDEC(u32 val)
 	switch (ipu_cmd.pos[0])
 	{
 		case 0:
-			if (!bitstream_init()) return false;
+			if (!BitstreamInit()) return false;
 
 			switch ((val >> 26) & 3)
 			{
@@ -51,15 +1606,42 @@ static __fi bool ipuVDEC(u32 val)
 				case 1://Macroblock Type
 					decoder.frame_pred_frame_dct = 1;
 					decoder.coding_type = ipuRegs.ctrl.PCT > 0 ? ipuRegs.ctrl.PCT : 1; // Kaiketsu Zorro Mezase doesn't set a Picture type, seems happy with I
-					ipuRegs.cmd.DATA = get_macroblock_modes();
+					ipuRegs.cmd.DATA = GetMacroblockModes();
 					break;
 
 				case 2://Motion Code
-					ipuRegs.cmd.DATA = get_motion_delta(0);
+					{
+						const u16 code = UBITS(16);
+						if ((code & 0x8000))
+						{
+							DUMPBITS(1);
+							ipuRegs.cmd.DATA = 0x00010000;
+						}
+						else
+						{
+							const MVtab* tab;
+							if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
+								tab = MV_4 + UBITS(4);
+							else
+								tab = MV_10 + UBITS(10);
+
+							const int delta = tab->delta + 1;
+							DUMPBITS(tab->len);
+
+							const int sign = SBITS(1);
+							DUMPBITS(1);
+
+							ipuRegs.cmd.DATA = (((delta ^ sign) - sign) | (tab->len << 16));
+						}
+					}
 					break;
 
 				case 3://DMVector
-					ipuRegs.cmd.DATA = get_dmv();
+					{
+						const DMVtab* tab = DMV_2 + UBITS(2);
+						DUMPBITS(tab->len);
+						ipuRegs.cmd.DATA = (tab->dmv | (tab->len << 16));
+					}
 					break;
 
 				jNO_DEFAULT
@@ -100,7 +1682,7 @@ static __fi bool ipuVDEC(u32 val)
 	return false;
 }
 
-static __ri bool ipuFDEC(u32 val)
+__ri static bool ipuFDEC(u32 val)
 {
 	if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
 
@@ -185,7 +1767,7 @@ static bool ipuSETVQ(u32 val)
 }
 
 // IPU Transfers are split into 8Qwords so we need to send ALL the data
-static __ri bool ipuCSC(tIPU_CMD_CSC csc)
+__ri static bool ipuCSC(tIPU_CMD_CSC csc)
 {
 	csc.log_from_YCbCr();
 
@@ -217,7 +1799,7 @@ static __ri bool ipuCSC(tIPU_CMD_CSC csc)
 	return true;
 }
 
-static __ri bool ipuPACK(tIPU_CMD_CSC csc)
+__ri static bool ipuPACK(tIPU_CMD_CSC csc)
 {
 	csc.log_from_RGB32();
 
@@ -254,7 +1836,7 @@ static __ri bool ipuPACK(tIPU_CMD_CSC csc)
 //  CORE Functions (referenced from MPEG library)
 // --------------------------------------------------------------------------------------
 
-__fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
+__fi static void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
 {
 	int i;
 	u8* p = (u8*)&rgb32;
@@ -288,7 +1870,7 @@ __fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
 	}
 }
 
-__fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
+__fi static void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
 {
 	const auto closest_index = [&](int i, int j) {
 		u8 index = 0;
diff --git a/pcsx2/IPU/IPU_MultiISA.h b/pcsx2/IPU/IPU_MultiISA.h
index d671e9a6f0..b56e07ae5c 100644
--- a/pcsx2/IPU/IPU_MultiISA.h
+++ b/pcsx2/IPU/IPU_MultiISA.h
@@ -15,13 +15,163 @@
 
 #pragma once
 
+#include "IPU/IPU.h"
+#include "IPU/mpeg2_vlc.h"
 #include "GS/MultiISA.h"
-#include "mpeg2lib/Mpeg.h"
 
-MULTI_ISA_DEF(void IPUWorker();)
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#ifdef _MSC_VER
+#define BigEndian(in) _byteswap_ulong(in)
+#else
+#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap...
+#endif
+
+#ifdef _MSC_VER
+#define BigEndian64(in) _byteswap_uint64(in)
+#else
+#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap...
+#endif
+
+struct macroblock_8{
+	u8 Y[16][16];		//0
+	u8 Cb[8][8];		//1
+	u8 Cr[8][8];		//2
+};
+
+struct macroblock_16{
+	s16 Y[16][16];			//0
+	s16 Cb[8][8];			//1
+	s16 Cr[8][8];			//2
+};
+
+struct macroblock_rgb32{
+	struct {
+		u8 r, g, b, a;
+	} c[16][16];
+};
+
+struct rgb16_t{
+	u16 r:5, g:5, b:5, a:1;
+};
+
+struct macroblock_rgb16{
+	rgb16_t	c[16][16];
+};
+
+struct decoder_t {
+	/* first, state that carries information from one macroblock to the */
+	/* next inside a slice, and is never used outside of mpeg2_slice() */
+
+	/* DCT coefficients - should be kept aligned ! */
+	s16 DCTblock[64];
+
+	u8 niq[64];			//non-intraquant matrix (sequence header)
+	u8 iq[64];			//intraquant matrix (sequence header)
+
+	macroblock_8 mb8;
+	macroblock_16 mb16;
+	macroblock_rgb32 rgb32;
+	macroblock_rgb16 rgb16;
+
+	uint ipu0_data;		// amount of data in the output macroblock (in QWC)
+	uint ipu0_idx;
+
+	int quantizer_scale;
+
+	/* now non-slice-specific information */
+
+	/* picture header stuff */
+
+	/* what type of picture this is (I, P, B, D) */
+	int coding_type;
+
+	/* picture coding extension stuff */
+
+	/* predictor for DC coefficients in intra blocks */
+	s16 dc_dct_pred[3];
+
+	/* quantization factor for intra dc coefficients */
+	int intra_dc_precision;
+	/* top/bottom/both fields */
+	int picture_structure;
+	/* bool to indicate all predictions are frame based */
+	int frame_pred_frame_dct;
+	/* bool to indicate whether intra blocks have motion vectors */
+	/* (for concealment) */
+	int concealment_motion_vectors;
+	/* bit to indicate which quantization table to use */
+	int q_scale_type;
+	/* bool to use different vlc tables */
+	int intra_vlc_format;
+	/* used for DMV MC */
+	int top_field_first;
+	// Pseudo Sign Offset
+	int sgn;
+	// Dither Enable
+	int dte;
+	// Output Format
+	int ofm;
+	// Macroblock type
+	int macroblock_modes;
+	// DC Reset
+	int dcr;
+	// Coded block pattern
+	int coded_block_pattern;
+
+	/* stuff derived from bitstream */
+
+	/* the zigzag scan we're supposed to be using, true for alt, false for normal */
+	bool scantype;
+
+	int mpeg1;
+
+	template< typename T >
+	void SetOutputTo( T& obj )
+	{
+		uint mb_offset = ((uptr)&obj - (uptr)&mb8);
+		pxAssume( (mb_offset & 15) == 0 );
+		ipu0_idx	= mb_offset / 16;
+		ipu0_data	= sizeof(obj)/16;
+	}
+
+	u128* GetIpuDataPtr()
+	{
+		return ((u128*)&mb8) + ipu0_idx;
+	}
+
+	void AdvanceIpuDataBy(uint amt)
+	{
+		pxAssertMsg(ipu0_data>=amt, "IPU FIFO Overflow on advance!" );
+		ipu0_idx  += amt;
+		ipu0_data -= amt;
+	}
+};
+
+alignas(16) extern decoder_t decoder;
+alignas(16) extern tIPU_BP g_BP;
+
+MULTI_ISA_DEF(
+	extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
+
+	void IPUWorker();
+)
 
 // Quantization matrix
 extern rgb16_t g_ipu_vqclut[16]; //clut conversion table
 extern u16 g_ipu_thresh[2]; //thresholds for color conversions
 
 alignas(16) extern u8 g_ipu_indx4[16*16/2];
+alignas(16) extern const int non_linear_quantizer_scale[32];
+extern int coded_block_pattern;
+
+struct mpeg2_scan_pack
+{
+	u8 norm[64];
+	u8 alt[64];
+};
+
+alignas(16) extern const std::array<u8, 1024> g_idct_clip_lut;
+alignas(16) extern const mpeg2_scan_pack mpeg2_scan;
diff --git a/pcsx2/IPU/IPUdither.cpp b/pcsx2/IPU/IPUdither.cpp
index a2d1f07877..a7bf620590 100644
--- a/pcsx2/IPU/IPUdither.cpp
+++ b/pcsx2/IPU/IPUdither.cpp
@@ -16,12 +16,10 @@
 #include "PrecompiledHeader.h"
 #include "Common.h"
 
-#include "IPU.h"
-#include "IPUdma.h"
-#include "yuv2rgb.h"
-#include "mpeg2lib/Mpeg.h"
-
-#include "GS/MultiISA.h"
+#include "IPU/IPU.h"
+#include "IPU/IPUdma.h"
+#include "IPU/yuv2rgb.h"
+#include "IPU/IPU_MultiISA.h"
 
 MULTI_ISA_UNSHARED_START
 
diff --git a/pcsx2/IPU/IPUdma.cpp b/pcsx2/IPU/IPUdma.cpp
index e7e9f83d48..132b65859c 100644
--- a/pcsx2/IPU/IPUdma.cpp
+++ b/pcsx2/IPU/IPUdma.cpp
@@ -15,9 +15,9 @@
 
 #include "PrecompiledHeader.h"
 #include "Common.h"
-#include "IPU.h"
+#include "IPU/IPU.h"
 #include "IPU/IPUdma.h"
-#include "mpeg2lib/Mpeg.h"
+#include "IPU/IPU_MultiISA.h"
 
 IPUStatus IPU1Status;
 bool CommandExecuteQueued;
diff --git a/pcsx2/IPU/mpeg2_vlc.h b/pcsx2/IPU/mpeg2_vlc.h
new file mode 100644
index 0000000000..997c8d68d4
--- /dev/null
+++ b/pcsx2/IPU/mpeg2_vlc.h
@@ -0,0 +1,485 @@
+/*
+ * vlc.h
+ * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ * Modified by Florin for PCSX2 emu
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+/* NOTE: While part of this header is originally from libmpeg2, which is GPL - licensed,
+ * it's not substantial and does not contain any functions, therefore can be argued
+ * not to be a derived work. See http://lkml.iu.edu/hypermail/linux/kernel/0301.1/0362.html
+ * The constants themselves can also be argued to be part of the MPEG-2 standard, whose
+ * patents expired worldwide in Feb 2020.
+ */
+
+#pragma once
+#include <cstdint>
+
+#ifdef _MSC_VER
+#define VLC_ALIGNED16 __declspec(align(16))
+#else
+#define VLC_ALIGNED16 __attribute__((aligned(16)))
+#endif
+
+enum macroblock_modes
+{
+	MACROBLOCK_INTRA = 1,
+	MACROBLOCK_PATTERN = 2,
+	MACROBLOCK_MOTION_BACKWARD = 4,
+	MACROBLOCK_MOTION_FORWARD = 8,
+	MACROBLOCK_QUANT = 16,
+	DCT_TYPE_INTERLACED = 32
+};
+
+enum motion_type
+{
+	MOTION_TYPE_SHIFT = 6,
+	MOTION_TYPE_MASK = (3 * 64),
+	MOTION_TYPE_BASE = 64,
+	MC_FIELD = (1 * 64),
+	MC_FRAME = (2 * 64),
+	MC_16X8 = (2 * 64),
+	MC_DMV = (3 * 64)
+};
+
+/* picture structure */
+enum picture_structure
+{
+	TOP_FIELD = 1,
+	BOTTOM_FIELD = 2,
+	FRAME_PICTURE = 3
+};
+
+/* picture coding type */
+enum picture_coding_type
+{
+	I_TYPE = 1,
+	P_TYPE = 2,
+	B_TYPE = 3,
+	D_TYPE = 4
+};
+
+struct MBtab
+{
+	std::uint8_t modes;
+	std::uint8_t len;
+};
+
+struct MVtab
+{
+	std::uint8_t delta;
+	std::uint8_t len;
+};
+
+struct DMVtab
+{
+	std::int8_t dmv;
+	std::uint8_t len;
+};
+
+struct CBPtab
+{
+	std::uint8_t cbp;
+	std::uint8_t len;
+};
+
+struct DCtab
+{
+	std::uint8_t size;
+	std::uint8_t len;
+};
+
+struct DCTtab
+{
+	std::uint8_t run;
+	std::uint8_t level;
+	std::uint8_t len;
+};
+
+struct MBAtab
+{
+	std::uint8_t mba;
+	std::uint8_t len;
+};
+
+
+#define INTRA MACROBLOCK_INTRA
+#define QUANT MACROBLOCK_QUANT
+
+static constexpr MBtab MB_I[] = {
+	{INTRA | QUANT, 2}, {INTRA, 1}};
+
+#define MC MACROBLOCK_MOTION_FORWARD
+#define CODED MACROBLOCK_PATTERN
+
+static constexpr VLC_ALIGNED16 MBtab MB_P[] = {
+	{INTRA | QUANT, 6}, {CODED | QUANT, 5}, {MC | CODED | QUANT, 5}, {INTRA, 5},
+	{MC, 3}, {MC, 3}, {MC, 3}, {MC, 3},
+	{CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2},
+	{CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2},
+	{MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1},
+	{MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1},
+	{MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1},
+	{MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}, {MC | CODED, 1}};
+
+#define FWD MACROBLOCK_MOTION_FORWARD
+#define BWD MACROBLOCK_MOTION_BACKWARD
+#define INTER MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD
+
+static constexpr VLC_ALIGNED16 MBtab MB_B[] = {
+	{0, 0}, {INTRA | QUANT, 6},
+	{BWD | CODED | QUANT, 6}, {FWD | CODED | QUANT, 6},
+	{INTER | CODED | QUANT, 5}, {INTER | CODED | QUANT, 5},
+	{INTRA, 5}, {INTRA, 5},
+	{FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4},
+	{FWD | CODED, 4}, {FWD | CODED, 4}, {FWD | CODED, 4}, {FWD | CODED, 4},
+	{BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3},
+	{BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3},
+	{BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3},
+	{BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3}, {BWD | CODED, 3},
+	{INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+	{INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+	{INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+	{INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+	{INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2},
+	{INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2},
+	{INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2},
+	{INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}, {INTER | CODED, 2}};
+
+#undef INTRA
+#undef QUANT
+#undef MC
+#undef CODED
+#undef FWD
+#undef BWD
+#undef INTER
+
+
+static constexpr MVtab MV_4[] = {
+	{3, 6}, {2, 4}, {1, 3}, {1, 3}, {0, 2}, {0, 2}, {0, 2}, {0, 2}};
+
+static constexpr VLC_ALIGNED16 MVtab MV_10[] = {
+	{0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10}, {0, 10},
+	{0, 10}, {0, 10}, {0, 10}, {0, 10}, {15, 10}, {14, 10}, {13, 10}, {12, 10},
+	{11, 10}, {10, 10}, {9, 9}, {9, 9}, {8, 9}, {8, 9}, {7, 9}, {7, 9},
+	{6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7}, {6, 7},
+	{5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7}, {5, 7},
+	{4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}, {4, 7}};
+
+
+static constexpr DMVtab DMV_2[] = {
+	{0, 1}, {0, 1}, {1, 2}, {-1, 2}};
+
+
+static constexpr VLC_ALIGNED16 CBPtab CBP_7[] = {
+	{0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
+	{0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
+	{0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
+	{0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
+	{0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
+	{0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
+	{0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
+	{0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
+	{0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
+	{0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
+	{0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
+	{0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
+	{0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
+	{0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
+	{0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
+	{0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
+	{0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+	{0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+	{0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+	{0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+	{0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+	{0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+	{0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+	{0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+	{0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+	{0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+	{0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+	{0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}};
+
+static constexpr VLC_ALIGNED16 CBPtab CBP_9[] = {
+	{0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
+	{0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
+	{0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
+	{0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
+	{0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
+	{0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
+	{0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
+	{0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
+	{0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
+	{0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
+	{0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
+	{0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
+	{0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
+	{0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
+	{0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
+	{0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}};
+
+struct MBAtabSet
+{
+	MBAtab mba5[30];
+	MBAtab mba11[26 * 4];
+};
+static constexpr VLC_ALIGNED16 MBAtabSet MBA = {
+	{// mba5
+		{6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
+		{2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
+		{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
+		{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}},
+
+	{// mba11
+		{32, 11}, {31, 11}, {30, 11}, {29, 11},
+		{28, 11}, {27, 11}, {26, 11}, {25, 11},
+		{24, 11}, {23, 11}, {22, 11}, {21, 11},
+		{20, 10}, {20, 10}, {19, 10}, {19, 10},
+		{18, 10}, {18, 10}, {17, 10}, {17, 10},
+		{16, 10}, {16, 10}, {15, 10}, {15, 10},
+		{14, 8}, {14, 8}, {14, 8}, {14, 8},
+		{14, 8}, {14, 8}, {14, 8}, {14, 8},
+		{13, 8}, {13, 8}, {13, 8}, {13, 8},
+		{13, 8}, {13, 8}, {13, 8}, {13, 8},
+		{12, 8}, {12, 8}, {12, 8}, {12, 8},
+		{12, 8}, {12, 8}, {12, 8}, {12, 8},
+		{11, 8}, {11, 8}, {11, 8}, {11, 8},
+		{11, 8}, {11, 8}, {11, 8}, {11, 8},
+		{10, 8}, {10, 8}, {10, 8}, {10, 8},
+		{10, 8}, {10, 8}, {10, 8}, {10, 8},
+		{9, 8}, {9, 8}, {9, 8}, {9, 8},
+		{9, 8}, {9, 8}, {9, 8}, {9, 8},
+		{8, 7}, {8, 7}, {8, 7}, {8, 7},
+		{8, 7}, {8, 7}, {8, 7}, {8, 7},
+		{8, 7}, {8, 7}, {8, 7}, {8, 7},
+		{8, 7}, {8, 7}, {8, 7}, {8, 7},
+		{7, 7}, {7, 7}, {7, 7}, {7, 7},
+		{7, 7}, {7, 7}, {7, 7}, {7, 7},
+		{7, 7}, {7, 7}, {7, 7}, {7, 7},
+		{7, 7}, {7, 7}, {7, 7}, {7, 7}}};
+
+struct DCtabSet
+{
+	DCtab lum0[32]; // Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110
+	DCtab lum1[16]; // Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111
+	DCtab chrom0[32]; // Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110
+	DCtab chrom1[32]; // Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111
+};
+
+static constexpr VLC_ALIGNED16 DCtabSet DCtable =
+	{
+		// lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
+		{{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+			{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+			{0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+			{4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0}},
+
+		/* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
+		{{7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
+			{8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}},
+
+		/* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
+		{{0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+			{1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+			{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+			{3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0}},
+
+		/* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
+		{{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+			{6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
+			{7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
+			{8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10, 10}, {11, 10}},
+};
+
+struct DCTtabSet
+{
+	DCTtab first[12];
+	DCTtab next[12];
+
+	DCTtab tab0[60];
+	DCTtab tab0a[252];
+	DCTtab tab1[8];
+	DCTtab tab1a[8];
+
+	DCTtab tab2[16];
+	DCTtab tab3[16];
+	DCTtab tab4[16];
+	DCTtab tab5[16];
+	DCTtab tab6[16];
+};
+
+static constexpr VLC_ALIGNED16 DCTtabSet DCT =
+	{
+		/* first[12]: Table B-14, DCT coefficients table zero,
+	 * codes 0100 ... 1xxx (used for first (DC) coefficient)
+	 */
+		{{0, 2, 4}, {2, 1, 4}, {1, 1, 3}, {1, 1, 3},
+			{0, 1, 1}, {0, 1, 1}, {0, 1, 1}, {0, 1, 1},
+			{0, 1, 1}, {0, 1, 1}, {0, 1, 1}, {0, 1, 1}},
+
+		/* next[12]: Table B-14, DCT coefficients table zero,
+	 * codes 0100 ... 1xxx (used for all other coefficients)
+	 */
+		{{0, 2, 4}, {2, 1, 4}, {1, 1, 3}, {1, 1, 3},
+			{64, 0, 2}, {64, 0, 2}, {64, 0, 2}, {64, 0, 2}, /* EOB */
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}},
+
+		/* tab0[60]: Table B-14, DCT coefficients table zero,
+	 * codes 000001xx ... 00111xxx
+	 */
+		{{65, 0, 6}, {65, 0, 6}, {65, 0, 6}, {65, 0, 6}, /* Escape */
+			{2, 2, 7}, {2, 2, 7}, {9, 1, 7}, {9, 1, 7},
+			{0, 4, 7}, {0, 4, 7}, {8, 1, 7}, {8, 1, 7},
+			{7, 1, 6}, {7, 1, 6}, {7, 1, 6}, {7, 1, 6},
+			{6, 1, 6}, {6, 1, 6}, {6, 1, 6}, {6, 1, 6},
+			{1, 2, 6}, {1, 2, 6}, {1, 2, 6}, {1, 2, 6},
+			{5, 1, 6}, {5, 1, 6}, {5, 1, 6}, {5, 1, 6},
+			{13, 1, 8}, {0, 6, 8}, {12, 1, 8}, {11, 1, 8},
+			{3, 2, 8}, {1, 3, 8}, {0, 5, 8}, {10, 1, 8},
+			{0, 3, 5}, {0, 3, 5}, {0, 3, 5}, {0, 3, 5},
+			{0, 3, 5}, {0, 3, 5}, {0, 3, 5}, {0, 3, 5},
+			{4, 1, 5}, {4, 1, 5}, {4, 1, 5}, {4, 1, 5},
+			{4, 1, 5}, {4, 1, 5}, {4, 1, 5}, {4, 1, 5},
+			{3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5},
+			{3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5}},
+
+		/* tab0a[252]: Table B-15, DCT coefficients table one,
+	 * codes 000001xx ... 11111111
+	 */
+		{{65, 0, 6}, {65, 0, 6}, {65, 0, 6}, {65, 0, 6}, /* Escape */
+			{7, 1, 7}, {7, 1, 7}, {8, 1, 7}, {8, 1, 7},
+			{6, 1, 7}, {6, 1, 7}, {2, 2, 7}, {2, 2, 7},
+			{0, 7, 6}, {0, 7, 6}, {0, 7, 6}, {0, 7, 6},
+			{0, 6, 6}, {0, 6, 6}, {0, 6, 6}, {0, 6, 6},
+			{4, 1, 6}, {4, 1, 6}, {4, 1, 6}, {4, 1, 6},
+			{5, 1, 6}, {5, 1, 6}, {5, 1, 6}, {5, 1, 6},
+			{1, 5, 8}, {11, 1, 8}, {0, 11, 8}, {0, 10, 8},
+			{13, 1, 8}, {12, 1, 8}, {3, 2, 8}, {1, 4, 8},
+			{2, 1, 5}, {2, 1, 5}, {2, 1, 5}, {2, 1, 5},
+			{2, 1, 5}, {2, 1, 5}, {2, 1, 5}, {2, 1, 5},
+			{1, 2, 5}, {1, 2, 5}, {1, 2, 5}, {1, 2, 5},
+			{1, 2, 5}, {1, 2, 5}, {1, 2, 5}, {1, 2, 5},
+			{3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5},
+			{3, 1, 5}, {3, 1, 5}, {3, 1, 5}, {3, 1, 5},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{1, 1, 3}, {1, 1, 3}, {1, 1, 3}, {1, 1, 3},
+			{64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4}, /* EOB */
+			{64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4},
+			{64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4},
+			{64, 0, 4}, {64, 0, 4}, {64, 0, 4}, {64, 0, 4},
+			{0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4},
+			{0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4},
+			{0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4},
+			{0, 3, 4}, {0, 3, 4}, {0, 3, 4}, {0, 3, 4},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 2, 3}, {0, 2, 3}, {0, 2, 3}, {0, 2, 3},
+			{0, 4, 5}, {0, 4, 5}, {0, 4, 5}, {0, 4, 5},
+			{0, 4, 5}, {0, 4, 5}, {0, 4, 5}, {0, 4, 5},
+			{0, 5, 5}, {0, 5, 5}, {0, 5, 5}, {0, 5, 5},
+			{0, 5, 5}, {0, 5, 5}, {0, 5, 5}, {0, 5, 5},
+			{9, 1, 7}, {9, 1, 7}, {1, 3, 7}, {1, 3, 7},
+			{10, 1, 7}, {10, 1, 7}, {0, 8, 7}, {0, 8, 7},
+			{0, 9, 7}, {0, 9, 7}, {0, 12, 8}, {0, 13, 8},
+			{2, 3, 8}, {4, 2, 8}, {0, 14, 8}, {0, 15, 8}},
+
+		/* Table B-14, DCT coefficients table zero,
+	 * codes 0000001000 ... 0000001111
+	 */
+		{{16, 1, 10}, {5, 2, 10}, {0, 7, 10}, {2, 3, 10},
+			{1, 4, 10}, {15, 1, 10}, {14, 1, 10}, {4, 2, 10}},
+
+		/* Table B-15, DCT coefficients table one,
+	 * codes 000000100x ... 000000111x
+	 */
+		{{5, 2, 9}, {5, 2, 9}, {14, 1, 9}, {14, 1, 9},
+			{2, 4, 10}, {16, 1, 10}, {15, 1, 9}, {15, 1, 9}},
+
+		/* Table B-14/15, DCT coefficients table zero / one,
+	 * codes 000000010000 ... 000000011111
+	 */
+		{{0, 11, 12}, {8, 2, 12}, {4, 3, 12}, {0, 10, 12},
+			{2, 4, 12}, {7, 2, 12}, {21, 1, 12}, {20, 1, 12},
+			{0, 9, 12}, {19, 1, 12}, {18, 1, 12}, {1, 5, 12},
+			{3, 3, 12}, {0, 8, 12}, {6, 2, 12}, {17, 1, 12}},
+
+		/* Table B-14/15, DCT coefficients table zero / one,
+	 * codes 0000000010000 ... 0000000011111
+	 */
+		{{10, 2, 13}, {9, 2, 13}, {5, 3, 13}, {3, 4, 13},
+			{2, 5, 13}, {1, 7, 13}, {1, 6, 13}, {0, 15, 13},
+			{0, 14, 13}, {0, 13, 13}, {0, 12, 13}, {26, 1, 13},
+			{25, 1, 13}, {24, 1, 13}, {23, 1, 13}, {22, 1, 13}},
+
+		/* Table B-14/15, DCT coefficients table zero / one,
+	 * codes 00000000010000 ... 00000000011111
+	 */
+		{{0, 31, 14}, {0, 30, 14}, {0, 29, 14}, {0, 28, 14},
+			{0, 27, 14}, {0, 26, 14}, {0, 25, 14}, {0, 24, 14},
+			{0, 23, 14}, {0, 22, 14}, {0, 21, 14}, {0, 20, 14},
+			{0, 19, 14}, {0, 18, 14}, {0, 17, 14}, {0, 16, 14}},
+
+		/* Table B-14/15, DCT coefficients table zero / one,
+	 * codes 000000000010000 ... 000000000011111
+	 */
+		{{0, 40, 15}, {0, 39, 15}, {0, 38, 15}, {0, 37, 15},
+			{0, 36, 15}, {0, 35, 15}, {0, 34, 15}, {0, 33, 15},
+			{0, 32, 15}, {1, 14, 15}, {1, 13, 15}, {1, 12, 15},
+			{1, 11, 15}, {1, 10, 15}, {1, 9, 15}, {1, 8, 15}},
+
+		/* Table B-14/15, DCT coefficients table zero / one,
+	 * codes 0000000000010000 ... 0000000000011111
+	 */
+		{{1, 18, 16}, {1, 17, 16}, {1, 16, 16}, {1, 15, 16},
+			{6, 3, 16}, {16, 2, 16}, {15, 2, 16}, {14, 2, 16},
+			{13, 2, 16}, {12, 2, 16}, {11, 2, 16}, {31, 1, 16},
+			{30, 1, 16}, {29, 1, 16}, {28, 1, 16}, {27, 1, 16}}
+
+};
+
+#undef VLC_ALIGNED16
\ No newline at end of file
diff --git a/pcsx2/IPU/mpeg2lib/Idct.cpp b/pcsx2/IPU/mpeg2lib/Idct.cpp
deleted file mode 100644
index dd1684b989..0000000000
--- a/pcsx2/IPU/mpeg2lib/Idct.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * idct.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- * Modified by Florin for PCSX2 emu
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-
-// [TODO] : There are modern SSE versions of idct (idct_mmx.c) in the mpeg2 libs that we
-// should probably upgrade to.  They use their own raw-style intrinsics and not the intel
-// compiler-integrated ones.
-
-#include "PrecompiledHeader.h"
-
-#include "Common.h"
-#include "IPU/IPU.h"
-#include "Mpeg.h"
-
-#include <array>
-
-#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
-#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
-#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
-#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
-#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
-#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
-
-/*
- * In legal streams, the IDCT output should be between -384 and +384.
- * In corrupted streams, it is possible to force the IDCT output to go
- * to +-3826 - this is the worst case for a column IDCT where the
- * column inputs are 16-bit values.
- */
-alignas(16) extern const std::array<u8, 1024> g_idct_clip_lut;
-
-#define CLIP(i) ((g_idct_clip_lut.data()+384)[(i)])
-
-MULTI_ISA_UNSHARED_START
-
-static __fi void BUTTERFLY(int& t0, int& t1, int w0, int w1, int d0, int d1)
-{
-#if 0
-    t0 = w0*d0 + w1*d1;
-    t1 = w0*d1 - w1*d0;
-#else
-    int tmp = w0 * (d0 + d1);
-    t0 = tmp + (w1 - w0) * d1;
-    t1 = tmp - (w1 + w0) * d0;
-#endif
-}
-
-static __fi void idct_row (s16 * const block)
-{
-    int d0, d1, d2, d3;
-    int a0, a1, a2, a3, b0, b1, b2, b3;
-    int t0, t1, t2, t3;
-
-    /* shortcut */
-    if (!(block[1] | ((s32 *)block)[1] | ((s32 *)block)[2] |
-		  ((s32 *)block)[3])) {
-		u32 tmp = (u16) (block[0] << 3);
-		tmp |= tmp << 16;
-		((s32 *)block)[0] = tmp;
-		((s32 *)block)[1] = tmp;
-		((s32 *)block)[2] = tmp;
-		((s32 *)block)[3] = tmp;
-		return;
-    }
-
-    d0 = (block[0] << 11) + 128;
-    d1 = block[1];
-    d2 = block[2] << 11;
-    d3 = block[3];
-    t0 = d0 + d2;
-    t1 = d0 - d2;
-    BUTTERFLY (t2, t3, W6, W2, d3, d1);
-    a0 = t0 + t2;
-    a1 = t1 + t3;
-    a2 = t1 - t3;
-    a3 = t0 - t2;
-
-    d0 = block[4];
-    d1 = block[5];
-    d2 = block[6];
-    d3 = block[7];
-    BUTTERFLY (t0, t1, W7, W1, d3, d0);
-    BUTTERFLY (t2, t3, W3, W5, d1, d2);
-    b0 = t0 + t2;
-    b3 = t1 + t3;
-    t0 -= t2;
-    t1 -= t3;
-    b1 = ((t0 + t1) * 181) >> 8;
-    b2 = ((t0 - t1) * 181) >> 8;
-
-    block[0] = (a0 + b0) >> 8;
-    block[1] = (a1 + b1) >> 8;
-    block[2] = (a2 + b2) >> 8;
-    block[3] = (a3 + b3) >> 8;
-    block[4] = (a3 - b3) >> 8;
-    block[5] = (a2 - b2) >> 8;
-    block[6] = (a1 - b1) >> 8;
-    block[7] = (a0 - b0) >> 8;
-}
-
-static __fi void idct_col (s16 * const block)
-{
-    int d0, d1, d2, d3;
-    int a0, a1, a2, a3, b0, b1, b2, b3;
-    int t0, t1, t2, t3;
-
-    d0 = (block[8*0] << 11) + 65536;
-    d1 = block[8*1];
-    d2 = block[8*2] << 11;
-    d3 = block[8*3];
-    t0 = d0 + d2;
-    t1 = d0 - d2;
-    BUTTERFLY (t2, t3, W6, W2, d3, d1);
-    a0 = t0 + t2;
-    a1 = t1 + t3;
-    a2 = t1 - t3;
-    a3 = t0 - t2;
-
-    d0 = block[8*4];
-    d1 = block[8*5];
-    d2 = block[8*6];
-    d3 = block[8*7];
-    BUTTERFLY (t0, t1, W7, W1, d3, d0);
-    BUTTERFLY (t2, t3, W3, W5, d1, d2);
-    b0 = t0 + t2;
-    b3 = t1 + t3;
-    t0 = (t0 - t2) >> 8;
-    t1 = (t1 - t3) >> 8;
-    b1 = (t0 + t1) * 181;
-    b2 = (t0 - t1) * 181;
-
-    block[8*0] = (a0 + b0) >> 17;
-    block[8*1] = (a1 + b1) >> 17;
-    block[8*2] = (a2 + b2) >> 17;
-    block[8*3] = (a3 + b3) >> 17;
-    block[8*4] = (a3 - b3) >> 17;
-    block[8*5] = (a2 - b2) >> 17;
-    block[8*6] = (a1 - b1) >> 17;
-    block[8*7] = (a0 - b0) >> 17;
-}
-
-__ri void mpeg2_idct_copy(s16 * block, u8 * dest, const int stride)
-{
-    int i;
-
-    for (i = 0; i < 8; i++)
-		idct_row (block + 8 * i);
-    for (i = 0; i < 8; i++)
-		idct_col (block + i);
-
-	__m128 zero = _mm_setzero_ps();
-    do {
-		dest[0] = CLIP (block[0]);
-		dest[1] = CLIP (block[1]);
-		dest[2] = CLIP (block[2]);
-		dest[3] = CLIP (block[3]);
-		dest[4] = CLIP (block[4]);
-		dest[5] = CLIP (block[5]);
-		dest[6] = CLIP (block[6]);
-		dest[7] = CLIP (block[7]);
-
-		_mm_store_ps((float*)block, zero);
-
-		dest += stride;
-		block += 8;
-    } while (--i);
-}
-
-
-// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]).
-__ri void mpeg2_idct_add (const int last, s16 * block, s16 * dest, const int stride)
-{
-	// on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0).
-
-    if (last != 129 || (block[0] & 7) == 4)
-    {
-		int i;
-		for (i = 0; i < 8; i++)
-			idct_row (block + 8 * i);
-		for (i = 0; i < 8; i++)
-			idct_col (block + i);
-
-		__m128 zero = _mm_setzero_ps();
-		do {
-			_mm_store_ps((float*)dest, _mm_load_ps((float*)block));
-			_mm_store_ps((float*)block, zero);
-
-			dest += stride;
-			block += 8;
-		} while (--i);
-
-    }
-    else
-    {
-		s16 DC = ((int)block[0] + 4) >> 3;
-		s16 dcf[2] = { DC, DC };
-		block[0] = block[63] = 0;
-
-		__m128 dc128 = _mm_set_ps1(*(float*)dcf);
-
-		for(int i=0; i<8; ++i)
-			_mm_store_ps((float*)(dest+(stride*i)), dc128);
-    }
-}
-
-MULTI_ISA_UNSHARED_END
-
-#if MULTI_ISA_COMPILE_ONCE
-
-static constexpr std::array<u8, 1024> make_clip_lut()
-{
-	std::array<u8, 1024> lut = {};
-	for (int i = -384; i < 640; i++)
-		lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
-	return lut;
-}
-
-static constexpr mpeg2_scan_pack make_scan_pack()
-{
-	constexpr u8 mpeg2_scan_norm[64] = {
-		/* Zig-Zag scan pattern */
-		0,  1,  8,  16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
-		12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
-		35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
-		58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
-	};
-
-	constexpr u8 mpeg2_scan_alt[64] = {
-		/* Alternate scan pattern */
-		0,  8,  16, 24,  1,  9,  2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
-		41, 33, 26, 18,  3, 11,  4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
-		51, 59, 20, 28,  5, 13,  6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
-		53, 61, 22, 30,  7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
-	};
-
-	mpeg2_scan_pack pack = {};
-
-	for (int i = 0; i < 64; i++) {
-		int j = mpeg2_scan_norm[i];
-		pack.norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
-		j = mpeg2_scan_alt[i];
-		pack.alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
-	}
-
-	return pack;
-}
-
-alignas(16) constexpr std::array<u8, 1024> g_idct_clip_lut = make_clip_lut();
-alignas(16) constexpr mpeg2_scan_pack mpeg2_scan = make_scan_pack();
-
-#endif
diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.cpp b/pcsx2/IPU/mpeg2lib/Mpeg.cpp
deleted file mode 100644
index 664f69c7a0..0000000000
--- a/pcsx2/IPU/mpeg2lib/Mpeg.cpp
+++ /dev/null
@@ -1,1285 +0,0 @@
-/*
- * Mpeg.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- * Modified by Florin for PCSX2 emu
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-
-// [Air] Note: many functions in this module are large and only used once, so they
-//	have been forced to inline since it won't bloat the program and gets rid of
-//	some call overhead.
-
-#include "PrecompiledHeader.h"
-
-#include "Common.h"
-#include "IPU/IPU.h"
-#include "Mpeg.h"
-#include "Vlc.h"
-
-#include "GS/MultiISA.h"
-
-#include "common/MemsetFast.inl"
-
-#if MULTI_ISA_COMPILE_ONCE
-
-const int non_linear_quantizer_scale [] =
-{
-	0,  1,  2,  3,  4,  5,	6,	7,
-	8, 10, 12, 14, 16, 18,  20,  22,
-	24, 28, 32, 36, 40, 44,  48,  52,
-	56, 64, 72, 80, 88, 96, 104, 112
-};
-
-#endif
-
-MULTI_ISA_UNSHARED_START
-
-/* Bitstream and buffer needs to be reallocated in order for successful
-	reading of the old data. Here the old data stored in the 2nd slot
-	of the internal buffer is copied to 1st slot, and the new data read
-	into 1st slot is copied to the 2nd slot. Which will later be copied
-	back to the 1st slot when 128bits have been read.
-*/
-const DCTtab * tab;
-int mbaCount = 0;
-
-int bitstream_init ()
-{
-	return g_BP.FillBuffer(32);
-}
-
-int get_macroblock_modes()
-{
-	int macroblock_modes;
-	const MBtab * tab;
-
-	switch (decoder.coding_type)
-	{
-		case I_TYPE:
-			macroblock_modes = UBITS(2);
-
-			if (macroblock_modes == 0) return 0;   // error
-
-			tab = MB_I + (macroblock_modes >> 1);
-			DUMPBITS(tab->len);
-			macroblock_modes = tab->modes;
-
-			if ((!(decoder.frame_pred_frame_dct)) &&
-				(decoder.picture_structure == FRAME_PICTURE))
-			{
-				macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
-			}
-			return macroblock_modes;
-
-		case P_TYPE:
-			macroblock_modes = UBITS(6);
-
-			if (macroblock_modes == 0) return 0;   // error
-
-			tab = MB_P + (macroblock_modes >> 1);
-			DUMPBITS(tab->len);
-			macroblock_modes = tab->modes;
-
-			if (decoder.picture_structure != FRAME_PICTURE)
-			{
-				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-				{
-					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
-				}
-
-				return macroblock_modes;
-			}
-			else if (decoder.frame_pred_frame_dct)
-			{
-				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-					macroblock_modes |= MC_FRAME;
-
-				return macroblock_modes;
-			}
-			else
-			{
-				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-				{
-					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
-				}
-
-				if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
-				{
-					macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
-				}
-
-				return macroblock_modes;
-			}
-
-		case B_TYPE:
-			macroblock_modes = UBITS(6);
-
-			if (macroblock_modes == 0) return 0;   // error
-
-			tab = MB_B + macroblock_modes;
-			DUMPBITS(tab->len);
-			macroblock_modes = tab->modes;
-
-			if (decoder.picture_structure != FRAME_PICTURE)
-			{
-				if (!(macroblock_modes & MACROBLOCK_INTRA))
-				{
-					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
-				}
-				return (macroblock_modes | (tab->len << 16));
-			}
-			else if (decoder.frame_pred_frame_dct)
-			{
-				/* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
-				macroblock_modes |= MC_FRAME;
-				return (macroblock_modes | (tab->len << 16));
-			}
-			else
-			{
-				if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
-
-				macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
-
-				if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
-				{
-intra:
-					macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
-				}
-				return (macroblock_modes | (tab->len << 16));
-			}
-
-		case D_TYPE:
-			macroblock_modes = GETBITS(1);
-			//I suspect (as this is actually a 2 bit command) that this should be getbits(2)
-			//additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction)
-			DevCon.Warning(" Rare MPEG command! ");
-			if (macroblock_modes == 0) return 0;   // error
-			return (MACROBLOCK_INTRA | (1 << 16));
-
-		default:
-			return 0;
-	}
-}
-
-static __fi int get_quantizer_scale()
-{
-	int quantizer_scale_code;
-
-	quantizer_scale_code = GETBITS(5);
-
-	if (decoder.q_scale_type)
-		return non_linear_quantizer_scale [quantizer_scale_code];
-	else
-		return quantizer_scale_code << 1;
-}
-
-static __fi int get_coded_block_pattern()
-{
-	const CBPtab * tab;
-	u16 code = UBITS(16);
-
-	if (code >= 0x2000)
-		tab = CBP_7 + (UBITS(7) - 16);
-	else
-		tab = CBP_9 + UBITS(9);
-
-	DUMPBITS(tab->len);
-	return tab->cbp;
-}
-
-int __fi get_motion_delta(const int f_code)
-{
-	int delta;
-	int sign;
-	const MVtab * tab;
-	u16 code = UBITS(16);
-
-	if ((code & 0x8000))
-	{
-		DUMPBITS(1);
-		return 0x00010000;
-	}
-	else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
-	{
-		tab = MV_4 + UBITS(4);
-	}
-	else
-	{
-		tab = MV_10 + UBITS(10);
-	}
-
-	delta = tab->delta + 1;
-	DUMPBITS(tab->len);
-
-	sign = SBITS(1);
-	DUMPBITS(1);
-
-	return (((delta ^ sign) - sign) | (tab->len << 16));
-}
-
-int __fi get_dmv()
-{
-	const DMVtab* tab = DMV_2 + UBITS(2);
-	DUMPBITS(tab->len);
-	return (tab->dmv | (tab->len << 16));
-}
-
-int get_macroblock_address_increment()
-{
-	const MBAtab *mba;
-
-	u16 code = UBITS(16);
-
-	if (code >= 4096)
-		mba = MBA.mba5 + (UBITS(5) - 2);
-	else if (code >= 768)
-		mba = MBA.mba11 + (UBITS(11) - 24);
-	else switch (UBITS(11))
-	{
-		case 8:		/* macroblock_escape */
-			DUMPBITS(11);
-			return 0xb0023;
-
-		case 15:	/* macroblock_stuffing (MPEG1 only) */
-			if (decoder.mpeg1)
-			{
-				DUMPBITS(11);
-				return 0xb0022;
-			}
-			[[fallthrough]];
-
-		default:
-			return 0;//error
-	}
-
-	DUMPBITS(mba->len);
-
-	return ((mba->mba + 1) | (mba->len << 16));
-}
-
-static __fi int get_luma_dc_dct_diff()
-{
-	int size;
-	int dc_diff;
-	u16 code = UBITS(5);
-
-	if (code < 31)
-	{
-		size = DCtable.lum0[code].size;
-		DUMPBITS(DCtable.lum0[code].len);
-
-		// 5 bits max
-	}
-	else
-	{
-		code = UBITS(9) - 0x1f0;
-		size = DCtable.lum1[code].size;
-		DUMPBITS(DCtable.lum1[code].len);
-
-		// 9 bits max
-	}
-
-	if (size==0)
-		dc_diff = 0;
-	else
-	{
-		dc_diff = GETBITS(size);
-
-		// 6 for tab0 and 11 for tab1
-		if ((dc_diff & (1<<(size-1)))==0)
-		  dc_diff-= (1<<size) - 1;
-	}
-
-	return dc_diff;
-}
-
-static __fi int get_chroma_dc_dct_diff()
-{
-	int size;
-	int dc_diff;
-	u16 code = UBITS(5);
-
-	if (code<31)
-	{
-		size = DCtable.chrom0[code].size;
-		DUMPBITS(DCtable.chrom0[code].len);
-	}
-	else
-	{
-		code = UBITS(10) - 0x3e0;
-		size = DCtable.chrom1[code].size;
-		DUMPBITS(DCtable.chrom1[code].len);
-	}
-
-	if (size==0)
-		dc_diff = 0;
-	else
-	{
-		dc_diff = GETBITS(size);
-
-		if ((dc_diff & (1<<(size-1)))==0)
-		{
-			dc_diff-= (1<<size) - 1;
-		}
-	}
-
-	return dc_diff;
-}
-
-static __fi void SATURATE(int& val)
-{
-	if ((u32)(val + 2048) > 4095)
-		val = (val >> 31) ^ 2047;
-}
-
-static bool get_intra_block()
-{
-	const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
-	const u8 (&quant_matrix)[64] = decoder.iq;
-	int quantizer_scale = decoder.quantizer_scale;
-	s16 * dest = decoder.DCTblock;
-	u16 code;
-
-	/* decode AC coefficients */
-  for (int i=1 + ipu_cmd.pos[4]; ; i++)
-  {
-	  switch (ipu_cmd.pos[5])
-	  {
-	  case 0:
-		if (!GETWORD())
-		{
-		  ipu_cmd.pos[4] = i - 1;
-		  return false;
-		}
-
-		code = UBITS(16);
-
-		if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
-		{
-		  tab = &DCT.next[(code >> 12) - 4];
-		}
-		else if (code >= 1024)
-		{
-			if (decoder.intra_vlc_format && !decoder.mpeg1)
-			{
-				tab = &DCT.tab0a[(code >> 8) - 4];
-			}
-			else
-			{
-				tab = &DCT.tab0[(code >> 8) - 4];
-			}
-		}
-		else if (code >= 512)
-		{
-			if (decoder.intra_vlc_format && !decoder.mpeg1)
-			{
-				tab = &DCT.tab1a[(code >> 6) - 8];
-			}
-			else
-			{
-				tab = &DCT.tab1[(code >> 6) - 8];
-			}
-		}
-
-		// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
-		// that should use a single unrolled DCT table instead of five separate tables used
-		// here.  Multiple conditional statements are very slow, while modern CPU data caches
-		// have lots of room to spare.
-
-		else if (code >= 256)
-		{
-			tab = &DCT.tab2[(code >> 4) - 16];
-		}
-		else if (code >= 128)
-		{
-			tab = &DCT.tab3[(code >> 3) - 16];
-		}
-		else if (code >= 64)
-		{
-			tab = &DCT.tab4[(code >> 2) - 16];
-		}
-		else if (code >= 32)
-		{
-			tab = &DCT.tab5[(code >> 1) - 16];
-		}
-		else if (code >= 16)
-		{
-			tab = &DCT.tab6[code - 16];
-		}
-		else
-		{
-		  ipu_cmd.pos[4] = 0;
-		  return true;
-		}
-
-		DUMPBITS(tab->len);
-
-		if (tab->run==64) /* end_of_block */
-		{
-			ipu_cmd.pos[4] = 0;
-			return true;
-		}
-
-		i += (tab->run == 65) ? GETBITS(6) : tab->run;
-		if (i >= 64)
-		{
-			ipu_cmd.pos[4] = 0;
-			return true;
-		}
-		[[fallthrough]];
-
-	  case 1:
-	  {
-			if (!GETWORD())
-			{
-				ipu_cmd.pos[4] = i - 1;
-				ipu_cmd.pos[5] = 1;
-				return false;
-			}
-
-			uint j = scan[i];
-			int val;
-
-			if (tab->run==65) /* escape */
-			{
-				if(!decoder.mpeg1)
-				{
-				  val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
-				  DUMPBITS(12);
-				}
-				else
-				{
-				  val = SBITS(8);
-				  DUMPBITS(8);
-
-				  if (!(val & 0x7f))
-				  {
-					val = GETBITS(8) + 2 * val;
-				  }
-
-				  val = (val * quantizer_scale * quant_matrix[i]) >> 4;
-				  val = (val + ~ (((s32)val) >> 31)) | 1;
-				}
-			}
-			else
-			{
-				val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
-				if(decoder.mpeg1)
-				{
-					/* oddification */
-					val = (val - 1) | 1;
-				}
-
-				/* if (bitstream_get (1)) val = -val; */
-				int bit1 = SBITS(1);
-				val = (val ^ bit1) - bit1;
-				DUMPBITS(1);
-			}
-
-			SATURATE(val);
-			dest[j] = val;
-			ipu_cmd.pos[5] = 0;
-		}
-	 }
-  }
-
-  ipu_cmd.pos[4] = 0;
-  return true;
-}
-
-static bool get_non_intra_block(int * last)
-{
-	int i;
-	int j;
-	int val;
-	const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
-	const u8 (&quant_matrix)[64] = decoder.niq;
-	int quantizer_scale = decoder.quantizer_scale;
-	s16 * dest = decoder.DCTblock;
-	u16 code;
-
-	/* decode AC coefficients */
-	for (i= ipu_cmd.pos[4] ; ; i++)
-	{
-		switch (ipu_cmd.pos[5])
-		{
-		case 0:
-			if (!GETWORD())
-			{
-				ipu_cmd.pos[4] = i;
-				return false;
-			}
-
-			code = UBITS(16);
-
-			if (code >= 16384)
-			{
-				if (i==0)
-				{
-					tab = &DCT.first[(code >> 12) - 4];
-				}
-				else
-				{
-					tab = &DCT.next[(code >> 12)- 4];
-				}
-			}
-			else if (code >= 1024)
-			{
-				tab = &DCT.tab0[(code >> 8) - 4];
-			}
-			else if (code >= 512)
-			{
-				tab = &DCT.tab1[(code >> 6) - 8];
-			}
-
-			// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
-			// that should use a single unrolled DCT table instead of five separate tables used
-			// here.  Multiple conditional statements are very slow, while modern CPU data caches
-			// have lots of room to spare.
-
-			else if (code >= 256)
-			{
-				tab = &DCT.tab2[(code >> 4) - 16];
-			}
-			else if (code >= 128)
-			{
-				tab = &DCT.tab3[(code >> 3) - 16];
-			}
-			else if (code >= 64)
-			{
-				tab = &DCT.tab4[(code >> 2) - 16];
-			}
-			else if (code >= 32)
-			{
-				tab = &DCT.tab5[(code >> 1) - 16];
-			}
-			else if (code >= 16)
-			{
-				tab = &DCT.tab6[code - 16];
-			}
-			else
-			{
-				ipu_cmd.pos[4] = 0;
-				return true;
-			}
-
-			DUMPBITS(tab->len);
-
-			if (tab->run==64) /* end_of_block */
-			{
-				*last = i;
-				ipu_cmd.pos[4] = 0;
-				return true;
-			}
-
-			i += (tab->run == 65) ? GETBITS(6) : tab->run;
-			if (i >= 64)
-			{
-				*last = i;
-				ipu_cmd.pos[4] = 0;
-				return true;
-			}
-			[[fallthrough]];
-
-		case 1:
-			if (!GETWORD())
-			{
-			  ipu_cmd.pos[4] = i;
-			  ipu_cmd.pos[5] = 1;
-			  return false;
-			}
-
-			j = scan[i];
-
-			if (tab->run==65) /* escape */
-			{
-				if (!decoder.mpeg1)
-				{
-					val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
-					DUMPBITS(12);
-				}
-				else
-				{
-				  val = SBITS(8);
-				  DUMPBITS(8);
-
-				  if (!(val & 0x7f))
-				  {
-					val = GETBITS(8) + 2 * val;
-				  }
-
-				  val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
-				  val = (val + ~ (((s32)val) >> 31)) | 1;
-				}
-			}
-			else
-			{
-				int bit1 = SBITS(1);
-				val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
-				val = (val ^ bit1) - bit1;
-				DUMPBITS(1);
-			}
-
-			SATURATE(val);
-			dest[j] = val;
-			ipu_cmd.pos[5] = 0;
-		}
-	}
-
-	ipu_cmd.pos[4] = 0;
-	return true;
-}
-
-static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
-{
-	if (!skip || ipu_cmd.pos[3])
-	{
-		ipu_cmd.pos[3] = 0;
-		if (!GETWORD())
-		{
-			ipu_cmd.pos[3] = 1;
-			return false;
-		}
-
-		/* Get the intra DC coefficient and inverse quantize it */
-		if (cc == 0)
-			decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
-		else
-			decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
-
-		decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
-	}
-
-	if (!get_intra_block())
-	{
-		return false;
-	}
-
-	mpeg2_idct_copy(decoder.DCTblock, dest, stride);
-
-	return true;
-}
-
-static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
-{
-	int last;
-
-	if (!skip)
-	{
-		memzero_sse_a(decoder.DCTblock);
-	}
-
-	if (!get_non_intra_block(&last))
-	{
-		return false;
-	}
-
-	mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
-
-	return true;
-}
-
-void __fi finishmpeg2sliceIDEC()
-{
-	ipuRegs.ctrl.SCD = 0;
-	coded_block_pattern = decoder.coded_block_pattern;
-}
-
-__fi bool mpeg2sliceIDEC()
-{
-	u16 code;
-
-	switch (ipu_cmd.pos[0])
-	{
-	case 0:
-		decoder.dc_dct_pred[0] =
-		decoder.dc_dct_pred[1] =
-		decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
-
-		ipuRegs.top = 0;
-		ipuRegs.ctrl.ECD = 0;
-		[[fallthrough]];
-
-	case 1:
-		ipu_cmd.pos[0] = 1;
-		if (!bitstream_init())
-		{
-			return false;
-		}
-		[[fallthrough]];
-
-	case 2:
-		ipu_cmd.pos[0] = 2;
-		while (1)
-		{
-			// IPU0 isn't ready for data, so let's wait for it to be
-			if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[1] <= 2)
-			{
-				return false;
-			}
-			macroblock_8& mb8 = decoder.mb8;
-			macroblock_rgb16& rgb16 = decoder.rgb16;
-			macroblock_rgb32& rgb32 = decoder.rgb32;
-
-			int DCT_offset, DCT_stride;
-			const MBAtab * mba;
-
-			switch (ipu_cmd.pos[1])
-			{
-			case 0:
-				decoder.macroblock_modes = get_macroblock_modes();
-
-				if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
-				{
-					decoder.quantizer_scale = get_quantizer_scale();
-				}
-
-				decoder.coded_block_pattern = 0x3F;//all 6 blocks
-				memzero_sse_a(mb8);
-				memzero_sse_a(rgb32);
-				[[fallthrough]];
-
-			case 1:
-				ipu_cmd.pos[1] = 1;
-
-				if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
-				{
-					DCT_offset = decoder_stride;
-					DCT_stride = decoder_stride * 2;
-				}
-				else
-				{
-					DCT_offset = decoder_stride * 8;
-					DCT_stride = decoder_stride;
-				}
-
-				switch (ipu_cmd.pos[2])
-				{
-				case 0:
-				case 1:
-					if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
-					{
-						ipu_cmd.pos[2] = 1;
-						return false;
-					}
-					[[fallthrough]];
-
-				case 2:
-					if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
-					{
-						ipu_cmd.pos[2] = 2;
-						return false;
-					}
-					[[fallthrough]];
-
-				case 3:
-					if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
-					{
-						ipu_cmd.pos[2] = 3;
-						return false;
-					}
-					[[fallthrough]];
-
-				case 4:
-					if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
-					{
-						ipu_cmd.pos[2] = 4;
-						return false;
-					}
-					[[fallthrough]];
-
-				case 5:
-					if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
-					{
-						ipu_cmd.pos[2] = 5;
-						return false;
-					}
-					[[fallthrough]];
-
-				case 6:
-					if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
-					{
-						ipu_cmd.pos[2] = 6;
-						return false;
-					}
-					break;
-
-				jNO_DEFAULT;
-				}
-
-				// Send The MacroBlock via DmaIpuFrom
-				ipu_csc(mb8, rgb32, decoder.sgn);
-
-				if (decoder.ofm == 0)
-					decoder.SetOutputTo(rgb32);
-				else
-				{
-					ipu_dither(rgb32, rgb16, decoder.dte);
-					decoder.SetOutputTo(rgb16);
-				}
-				[[fallthrough]];
-
-			case 2:
-			{
-
-				pxAssert(decoder.ipu0_data > 0);
-
-				uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
-				decoder.AdvanceIpuDataBy(read);
-
-				if (decoder.ipu0_data != 0)
-				{
-					// IPU FIFO filled up -- Will have to finish transferring later.
-					ipu_cmd.pos[1] = 2;
-					return false;
-				}
-
-				mbaCount = 0;
-				if (read)
-				{
-					ipu_cmd.pos[1] = 3;
-					return false;
-				}
-			}
-				[[fallthrough]];
-
-			case 3:
-				while (1)
-				{
-					if (!GETWORD())
-					{
-						ipu_cmd.pos[1] = 3;
-						return false;
-					}
-
-					code = UBITS(16);
-					if (code >= 0x1000)
-					{
-						mba = MBA.mba5 + (UBITS(5) - 2);
-						break;
-					}
-					else if (code >= 0x0300)
-					{
-						mba = MBA.mba11 + (UBITS(11) - 24);
-						break;
-					}
-					else switch (UBITS(11))
-					{
-						case 8:		/* macroblock_escape */
-							mbaCount += 33;
-							[[fallthrough]];
-
-						case 15:	/* macroblock_stuffing (MPEG1 only) */
-							DUMPBITS(11);
-							continue;
-
-						default:	/* end of slice/frame, or error? */
-						{
-							goto finish_idec;
-						}
-					}
-				}
-
-				DUMPBITS(mba->len);
-				mbaCount += mba->mba;
-
-				if (mbaCount)
-				{
-					decoder.dc_dct_pred[0] =
-					decoder.dc_dct_pred[1] =
-					decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
-				}
-				[[fallthrough]];
-
-			case 4:
-				if (!GETWORD())
-				{
-					ipu_cmd.pos[1] = 4;
-					return false;
-				}
-				break;
-
-			jNO_DEFAULT;
-			}
-
-			ipu_cmd.pos[1] = 0;
-			ipu_cmd.pos[2] = 0;
-		}
-
-finish_idec:
-		finishmpeg2sliceIDEC();
-		[[fallthrough]];
-
-	case 3:
-	{
-		u8 bit8;
-		u32 start_check;
-		if (!getBits8((u8*)&bit8, 0))
-		{
-			ipu_cmd.pos[0] = 3;
-			return false;
-		}
-
-		if (bit8 == 0)
-		{
-			g_BP.Align();
-			do
-			{
-				if (!g_BP.FillBuffer(24))
-				{
-					ipu_cmd.pos[0] = 3;
-					return false;
-				}
-				start_check = UBITS(24);
-				if (start_check != 0)
-				{
-					if (start_check == 1)
-					{
-						ipuRegs.ctrl.SCD = 1;
-					}
-					else
-					{
-						ipuRegs.ctrl.ECD = 1;
-					}
-					break;
-				}
-				DUMPBITS(8);
-			} while (1);
-		}
-	}
-		[[fallthrough]];
-
-	case 4:
-		if (!getBits32((u8*)&ipuRegs.top, 0))
-		{
-			ipu_cmd.pos[0] = 4;
-			return false;
-		}
-
-		ipuRegs.top = BigEndian(ipuRegs.top);
-		break;
-
-	jNO_DEFAULT;
-	}
-
-	return true;
-}
-
-__fi bool mpeg2_slice()
-{
-	int DCT_offset, DCT_stride;
-
-	macroblock_8& mb8 = decoder.mb8;
-	macroblock_16& mb16 = decoder.mb16;
-
-	switch (ipu_cmd.pos[0])
-	{
-	case 0:
-		if (decoder.dcr)
-		{
-			decoder.dc_dct_pred[0] =
-			decoder.dc_dct_pred[1] =
-			decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
-		}
-
-		ipuRegs.ctrl.ECD = 0;
-		ipuRegs.top = 0;
-		memzero_sse_a(mb8);
-		memzero_sse_a(mb16);
-		[[fallthrough]];
-
-	case 1:
-		if (!bitstream_init())
-		{
-			ipu_cmd.pos[0] = 1;
-			return false;
-		}
-		[[fallthrough]];
-
-	case 2:
-		ipu_cmd.pos[0] = 2;
-
-		// IPU0 isn't ready for data, so let's wait for it to be
-		if ((!ipu0ch.chcr.STR || ipuRegs.ctrl.OFC || ipu0ch.qwc == 0) && ipu_cmd.pos[0] <= 3)
-		{
-			return false;
-		}
-
-		if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
-		{
-			DCT_offset = decoder_stride;
-			DCT_stride = decoder_stride * 2;
-		}
-		else
-		{
-			DCT_offset = decoder_stride * 8;
-			DCT_stride = decoder_stride;
-		}
-
-		if (decoder.macroblock_modes & MACROBLOCK_INTRA)
-		{
-			switch(ipu_cmd.pos[1])
-			{
-			case 0:
-				decoder.coded_block_pattern = 0x3F;
-				[[fallthrough]];
-
-			case 1:
-				if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
-				{
-					ipu_cmd.pos[1] = 1;
-					return false;
-				}
-				[[fallthrough]];
-
-			case 2:
-				if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
-				{
-					ipu_cmd.pos[1] = 2;
-					return false;
-				}
-				[[fallthrough]];
-
-			case 3:
-				if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
-				{
-					ipu_cmd.pos[1] = 3;
-					return false;
-				}
-				[[fallthrough]];
-
-			case 4:
-				if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
-				{
-					ipu_cmd.pos[1] = 4;
-					return false;
-				}
-				[[fallthrough]];
-
-			case 5:
-				if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
-				{
-					ipu_cmd.pos[1] = 5;
-					return false;
-				}
-				[[fallthrough]];
-
-			case 6:
-				if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
-				{
-					ipu_cmd.pos[1] = 6;
-					return false;
-				}
-				break;
-
-			jNO_DEFAULT;
-			}
-
-			// Copy macroblock8 to macroblock16 - without sign extension.
-			// Manually inlined due to MSVC refusing to inline the SSE-optimized version.
-			{
-				const u8	*s = (const u8*)&mb8;
-				u16			*d = (u16*)&mb16;
-
-				//Y  bias	- 16 * 16
-				//Cr bias	- 8 * 8
-				//Cb bias	- 8 * 8
-
-				__m128i zeroreg = _mm_setzero_si128();
-
-				for (uint i = 0; i < (256+64+64) / 32; ++i)
-				{
-					//*d++ = *s++;
-					__m128i woot1 = _mm_load_si128((__m128i*)s);
-					__m128i woot2 = _mm_load_si128((__m128i*)s+1);
-					_mm_store_si128((__m128i*)d,	_mm_unpacklo_epi8(woot1, zeroreg));
-					_mm_store_si128((__m128i*)d+1,	_mm_unpackhi_epi8(woot1, zeroreg));
-					_mm_store_si128((__m128i*)d+2,	_mm_unpacklo_epi8(woot2, zeroreg));
-					_mm_store_si128((__m128i*)d+3,	_mm_unpackhi_epi8(woot2, zeroreg));
-					s += 32;
-					d += 32;
-				}
-			}
-		}
-		else
-		{
-			if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
-			{
-				switch(ipu_cmd.pos[1])
-				{
-				case 0:
-					decoder.coded_block_pattern = get_coded_block_pattern();  // max 9bits
-					[[fallthrough]];
-
-				case 1:
-					if (decoder.coded_block_pattern & 0x20)
-					{
-						if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
-						{
-							ipu_cmd.pos[1] = 1;
-							return false;
-						}
-					}
-					[[fallthrough]];
-
-				case 2:
-					if (decoder.coded_block_pattern & 0x10)
-					{
-						if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
-						{
-							ipu_cmd.pos[1] = 2;
-							return false;
-						}
-					}
-					[[fallthrough]];
-
-				case 3:
-					if (decoder.coded_block_pattern & 0x08)
-					{
-						if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
-						{
-							ipu_cmd.pos[1] = 3;
-							return false;
-						}
-					}
-					[[fallthrough]];
-
-				case 4:
-					if (decoder.coded_block_pattern & 0x04)
-					{
-						if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
-						{
-							ipu_cmd.pos[1] = 4;
-							return false;
-						}
-					}
-					[[fallthrough]];
-
-				case 5:
-					if (decoder.coded_block_pattern & 0x2)
-					{
-						if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
-						{
-							ipu_cmd.pos[1] = 5;
-							return false;
-						}
-					}
-					[[fallthrough]];
-
-				case 6:
-					if (decoder.coded_block_pattern & 0x1)
-					{
-						if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
-						{
-							ipu_cmd.pos[1] = 6;
-							return false;
-						}
-					}
-					break;
-
-				jNO_DEFAULT;
-				}
-			}
-		}
-
-		// Send The MacroBlock via DmaIpuFrom
-		ipuRegs.ctrl.SCD = 0;
-		coded_block_pattern = decoder.coded_block_pattern;
-
-		decoder.SetOutputTo(mb16);
-		[[fallthrough]];
-
-	case 3:
-	{
-		pxAssert(decoder.ipu0_data > 0);
-
-		uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
-		decoder.AdvanceIpuDataBy(read);
-
-		if (decoder.ipu0_data != 0)
-		{
-			// IPU FIFO filled up -- Will have to finish transferring later.
-			ipu_cmd.pos[0] = 3;
-			return false;
-		}
-
-		mbaCount = 0;
-		if (read)
-		{
-			ipu_cmd.pos[0] = 4;
-			return false;
-		}
-	}
-		[[fallthrough]];
-
-	case 4:
-	{
-		u8 bit8;
-		u32 start_check;
-		if (!getBits8((u8*)&bit8, 0))
-		{
-			ipu_cmd.pos[0] = 4;
-			return false;
-		}
-
-		if (bit8 == 0)
-		{
-			g_BP.Align();
-			do
-			{
-				if (!g_BP.FillBuffer(24))
-				{
-					ipu_cmd.pos[0] = 4;
-					return false;
-				}
-				start_check = UBITS(24);
-				if (start_check != 0)
-				{
-					if (start_check == 1)
-					{
-						ipuRegs.ctrl.SCD = 1;
-					}
-					else
-					{
-						ipuRegs.ctrl.ECD = 1;
-					}
-					break;
-				}
-				DUMPBITS(8);
-			} while (1);
-		}
-	}
-		[[fallthrough]];
-
-	case 5:
-		if (!getBits32((u8*)&ipuRegs.top, 0))
-		{
-			ipu_cmd.pos[0] = 5;
-			return false;
-		}
-
-		ipuRegs.top = BigEndian(ipuRegs.top);
-		break;
-	}
-
-	return true;
-}
-
-MULTI_ISA_UNSHARED_END
diff --git a/pcsx2/IPU/mpeg2lib/Mpeg.h b/pcsx2/IPU/mpeg2lib/Mpeg.h
deleted file mode 100644
index d5ea1132bd..0000000000
--- a/pcsx2/IPU/mpeg2lib/Mpeg.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Mpeg.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- * Modified by Florin for PCSX2 emu
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-
-#pragma once
-
-#include "IPU/IPU.h"
-
-#include "GS/MultiISA.h"
-
-#include "common/Assertions.h"
-
-// the IPU is fixed to 16 byte strides (128-bit / QWC resolution):
-static const uint decoder_stride = 16;
-
-enum macroblock_modes
-{
-	MACROBLOCK_INTRA = 1,
-	MACROBLOCK_PATTERN = 2,
-	MACROBLOCK_MOTION_BACKWARD = 4,
-	MACROBLOCK_MOTION_FORWARD = 8,
-	MACROBLOCK_QUANT = 16,
-	DCT_TYPE_INTERLACED = 32
-};
-
-enum motion_type
-{
-	MOTION_TYPE_SHIFT = 6,
-	MOTION_TYPE_MASK = (3*64),
-	MOTION_TYPE_BASE = 64,
-	MC_FIELD = (1*64),
-	MC_FRAME = (2*64),
-	MC_16X8 = (2*64),
-	MC_DMV = (3*64)
-};
-
-/* picture structure */
-enum picture_structure
-{
-	TOP_FIELD = 1,
-	BOTTOM_FIELD = 2,
-	FRAME_PICTURE = 3
-};
-
-/* picture coding type */
-enum picture_coding_type
-{
-	I_TYPE = 1,
-	P_TYPE = 2,
-	B_TYPE = 3,
-	D_TYPE = 4
-};
-
-struct macroblock_8{
-	u8 Y[16][16];		//0
-	u8 Cb[8][8];		//1
-	u8 Cr[8][8];		//2
-};
-
-struct macroblock_16{
-	s16 Y[16][16];			//0
-	s16 Cb[8][8];			//1
-	s16 Cr[8][8];			//2
-};
-
-struct macroblock_rgb32{
-	struct {
-		u8 r, g, b, a;
-	} c[16][16];
-};
-
-struct rgb16_t{
-	u16 r:5, g:5, b:5, a:1;
-};
-
-struct macroblock_rgb16{
-	rgb16_t	c[16][16];
-};
-
-struct decoder_t {
-	/* first, state that carries information from one macroblock to the */
-	/* next inside a slice, and is never used outside of mpeg2_slice() */
-
-	/* DCT coefficients - should be kept aligned ! */
-	s16 DCTblock[64];
-
-	u8 niq[64];			//non-intraquant matrix (sequence header)
-	u8 iq[64];			//intraquant matrix (sequence header)
-
-	macroblock_8 mb8;
-	macroblock_16 mb16;
-	macroblock_rgb32 rgb32;
-	macroblock_rgb16 rgb16;
-
-	uint ipu0_data;		// amount of data in the output macroblock (in QWC)
-	uint ipu0_idx;
-
-	int quantizer_scale;
-
-	/* now non-slice-specific information */
-
-	/* picture header stuff */
-
-	/* what type of picture this is (I, P, B, D) */
-	int coding_type;
-
-	/* picture coding extension stuff */
-
-	/* predictor for DC coefficients in intra blocks */
-	s16 dc_dct_pred[3];
-
-	/* quantization factor for intra dc coefficients */
-	int intra_dc_precision;
-	/* top/bottom/both fields */
-	int picture_structure;
-	/* bool to indicate all predictions are frame based */
-	int frame_pred_frame_dct;
-	/* bool to indicate whether intra blocks have motion vectors */
-	/* (for concealment) */
-	int concealment_motion_vectors;
-	/* bit to indicate which quantization table to use */
-	int q_scale_type;
-	/* bool to use different vlc tables */
-	int intra_vlc_format;
-	/* used for DMV MC */
-	int top_field_first;
-	// Pseudo Sign Offset
-	int sgn;
-	// Dither Enable
-	int dte;
-	// Output Format
-	int ofm;
-	// Macroblock type
-	int macroblock_modes;
-	// DC Reset
-	int dcr;
-	// Coded block pattern
-	int coded_block_pattern;
-
-	/* stuff derived from bitstream */
-
-	/* the zigzag scan we're supposed to be using, true for alt, false for normal */
-	bool scantype;
-
-	int mpeg1;
-
-	template< typename T >
-	void SetOutputTo( T& obj )
-	{
-		uint mb_offset = ((uptr)&obj - (uptr)&mb8);
-		pxAssume( (mb_offset & 15) == 0 );
-		ipu0_idx	= mb_offset / 16;
-		ipu0_data	= sizeof(obj)/16;
-	}
-
-	u128* GetIpuDataPtr()
-	{
-		return ((u128*)&mb8) + ipu0_idx;
-	}
-
-	void AdvanceIpuDataBy(uint amt)
-	{
-		pxAssertMsg(ipu0_data>=amt, "IPU FIFO Overflow on advance!" );
-		ipu0_idx  += amt;
-		ipu0_data -= amt;
-	}
-};
-
-struct mpeg2_scan_pack
-{
-	u8 norm[64];
-	u8 alt[64];
-};
-
-extern u32 UBITS(uint bits);
-extern s32 SBITS(uint bits);
-
-MULTI_ISA_DEF(
-	extern int bitstream_init();
-
-	extern void mpeg2_idct_copy(s16 * block, u8* dest, int stride);
-	extern void mpeg2_idct_add(int last, s16 * block, s16* dest, int stride);
-
-	extern bool mpeg2sliceIDEC();
-	extern bool mpeg2_slice();
-	extern int get_macroblock_address_increment();
-	extern int get_macroblock_modes();
-
-	extern int get_motion_delta(const int f_code);
-	extern int get_dmv();
-
-	extern void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn);
-	extern void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte);
-	extern void ipu_vq(macroblock_rgb16& rgb16, u8* indx4);
-
-	extern int slice (u8 * buffer);
-)
-
-#ifdef _MSC_VER
-#define BigEndian(in) _byteswap_ulong(in)
-#else
-#define BigEndian(in) __builtin_bswap32(in) // or we could use the asm function bswap...
-#endif
-
-#ifdef _MSC_VER
-#define BigEndian64(in) _byteswap_uint64(in)
-#else
-#define BigEndian64(in) __builtin_bswap64(in) // or we could use the asm function bswap...
-#endif
-
-alignas(16) extern const mpeg2_scan_pack mpeg2_scan;
-extern const int non_linear_quantizer_scale[];
-
-// The IPU can only do one task at once and never uses other buffers so all mpeg state variables
-// are made available to mpeg/vlc modules as globals here:
-
-alignas(16) extern tIPU_BP g_BP;
-alignas(16) extern decoder_t decoder;
-
diff --git a/pcsx2/IPU/mpeg2lib/Vlc.h b/pcsx2/IPU/mpeg2lib/Vlc.h
deleted file mode 100644
index 61bcd791b5..0000000000
--- a/pcsx2/IPU/mpeg2lib/Vlc.h
+++ /dev/null
@@ -1,663 +0,0 @@
-/*
- * vlc.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- * Modified by Florin for PCSX2 emu
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
- */
-
-// WARNING!  This file should only be included into Mpeg.cpp AND NOWHERE ELSE.
-// All contents of this file are used only by Mpeg.cpp, and including it elsewhere will
-// just result in the linker having to remove a whole lot of redundant/unused decoder
-// tables and static functions. -- air
-
-#pragma once
-
-static __fi int GETWORD()
-{
-	return g_BP.FillBuffer(16);
-}
-
-// Removes bits from the bitstream.  This is done independently of UBITS/SBITS because a
-// lot of mpeg streams have to read ahead and rewind bits and re-read them at different
-// bit depths or sign'age.
-static __fi void DUMPBITS(uint num)
-{
-	g_BP.Advance(num);
-	//pxAssume(g_BP.FP != 0);
-}
-
-static __fi u32 GETBITS(uint num)
-{
-	uint retVal = UBITS(num);
-	g_BP.Advance(num);
-
-	return retVal;
-}
-
-struct MBtab {
-    u8 modes;
-    u8 len;
-};
-
-struct MVtab {
-    u8 delta;
-    u8 len;
-};
-
-struct DMVtab {
-    s8 dmv;
-    u8 len;
-};
-
-struct CBPtab {
-    u8 cbp;
-    u8 len;
-};
-
-struct DCtab {
-    u8 size;
-    u8 len;
-};
-
-struct DCTtab {
-    u8 run;
-    u8 level;
-    u8 len;
-};
-
-struct MBAtab {
-    u8 mba;
-    u8 len;
-};
-
-
-#define INTRA MACROBLOCK_INTRA
-#define QUANT MACROBLOCK_QUANT
-
-static const MBtab MB_I [] = {
-    {INTRA|QUANT, 2}, {INTRA, 1}
-};
-
-#define MC MACROBLOCK_MOTION_FORWARD
-#define CODED MACROBLOCK_PATTERN
-
-alignas(16) static const MBtab MB_P [] = {
-    {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
-    {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
-    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-    {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-    {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
-};
-
-#define FWD MACROBLOCK_MOTION_FORWARD
-#define BWD MACROBLOCK_MOTION_BACKWARD
-#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
-
-alignas(16) static const MBtab MB_B [] = {
-    {0,                 0}, {INTRA|QUANT,       6},
-    {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
-    {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
-					{INTRA,       5}, {INTRA,       5},
-    {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
-    {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
-    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-    {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-    {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-    {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
-};
-
-#undef INTRA
-#undef QUANT
-#undef MC
-#undef CODED
-#undef FWD
-#undef BWD
-#undef INTER
-
-
-static const MVtab MV_4 [] = {
-    { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
-};
-
-alignas(16) static const MVtab MV_10 [] = {
-    { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
-    { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
-    {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
-    { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
-    { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
-    { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
-};
-
-
-static const DMVtab DMV_2 [] = {
-    { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
-};
-
-
-alignas(16) static const CBPtab CBP_7 [] = {
-    {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
-    {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
-    {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
-    {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
-    {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
-    {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
-    {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
-    {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
-    {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
-    {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
-    {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
-    {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
-    {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
-    {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
-    {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
-    {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
-    {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
-    {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
-    {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
-    {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
-    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-    {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-    {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-    {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
-};
-
-alignas(16) static const CBPtab CBP_9 [] = {
-    {0,    0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
-    {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
-    {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
-    {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
-    {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
-    {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
-    {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
-    {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
-    {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
-    {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
-    {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
-    {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
-    {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
-    {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
-    {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
-    {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
-};
-
-#if 0		// following tables are unused by PCSX2
-
-static const DCtab DC_lum_5 [] = {
-    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-    {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
-    {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
-};
-
-static const DCtab DC_chrom_5 [] = {
-    {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
-    {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-    {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-    {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
-};
-
-static const DCtab DC_long [] = {
-    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-    {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-    {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
-    {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
-};
-
-static const DCTtab DCT_16 [] = {
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-    {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
-    {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
-    { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
-    { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
-};
-
-static const DCTtab DCT_15 [] = {
-    {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
-    {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
-    {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
-    {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
-    {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
-    {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
-    {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
-    {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
-    {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
-    {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
-    {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
-    {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
-};
-
-static const DCTtab DCT_13 [] = {
-    { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
-    {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
-    {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
-    { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
-    {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
-    {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
-    {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
-    { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
-    {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
-    { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
-    {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
-    {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
-};
-
-static const DCTtab DCT_B14_10 [] = {
-    { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
-    {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
-};
-
-static const DCTtab DCT_B14_8 [] = {
-    { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
-    {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
-    {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
-    {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
-    {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
-    {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
-    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-    { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
-    {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
-};
-
-static const DCTtab DCT_B14AC_5 [] = {
-		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-    {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
-};
-
-static const DCTtab DCT_B14DC_5 [] = {
-		 {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-    {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-    {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
-};
-
-static const DCTtab DCT_B15_10 [] = {
-    {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
-    {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
-};
-
-static const DCTtab DCT_B15_8 [] = {
-    { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
-    {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
-    {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
-    {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
-    {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
-    {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
-    {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-    {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
-    { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
-    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-    {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-    {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-    {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-    {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-    {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-    { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
-    { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
-    {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
-    {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
-};
-#endif
-
-struct MBAtabSet
-{
-	MBAtab mba5[30];
-	MBAtab mba11[26*4];
-};
-alignas(16) static const MBAtabSet MBA = {
-	{	// mba5
-				{6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
-		{2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
-		{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
-		{0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
-	},
-
-	{	// mba11
-		{32, 11}, {31, 11}, {30, 11}, {29, 11},
-		{28, 11}, {27, 11}, {26, 11}, {25, 11},
-		{24, 11}, {23, 11}, {22, 11}, {21, 11},
-		{20, 10}, {20, 10}, {19, 10}, {19, 10},
-		{18, 10}, {18, 10}, {17, 10}, {17, 10},
-		{16, 10}, {16, 10}, {15, 10}, {15, 10},
-		{14,  8}, {14,  8}, {14,  8}, {14,  8},
-		{14,  8}, {14,  8}, {14,  8}, {14,  8},
-		{13,  8}, {13,  8}, {13,  8}, {13,  8},
-		{13,  8}, {13,  8}, {13,  8}, {13,  8},
-		{12,  8}, {12,  8}, {12,  8}, {12,  8},
-		{12,  8}, {12,  8}, {12,  8}, {12,  8},
-		{11,  8}, {11,  8}, {11,  8}, {11,  8},
-		{11,  8}, {11,  8}, {11,  8}, {11,  8},
-		{10,  8}, {10,  8}, {10,  8}, {10,  8},
-		{10,  8}, {10,  8}, {10,  8}, {10,  8},
-		{ 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-		{ 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-		{ 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-		{ 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-		{ 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-		{ 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-		{ 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-		{ 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-		{ 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-		{ 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
-	}
-};
-
-// New
-
-#if 0		// Not used by PCSX2
-/* Table B-1, macroblock_address_increment, codes 00010 ... 011xx */
-static MBAtab MBAtab1[16] =
-{ {0,0}, {0,0}, {7,5}, {6,5}, {5,4}, {5,4}, {4,4}, {4,4},
-  {3,3}, {3,3}, {3,3}, {3,3}, {2,3}, {2,3}, {2,3}, {2,3}
-};
-
-/* Table B-1, macroblock_address_increment, codes 00000011000 ... 0000111xxxx */
-static MBAtab MBAtab2[104] =
-{
-  {33,11}, {32,11}, {31,11}, {30,11}, {29,11}, {28,11}, {27,11}, {26,11},
-  {25,11}, {24,11}, {23,11}, {22,11}, {21,10}, {21,10}, {20,10}, {20,10},
-  {19,10}, {19,10}, {18,10}, {18,10}, {17,10}, {17,10}, {16,10}, {16,10},
-  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},  {15,8},
-  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},  {14,8},
-  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},  {13,8},
-  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},  {12,8},
-  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},  {11,8},
-  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},  {10,8},
-  {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},
-  {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},   {9,7},
-  {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},
-  {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7},   {8,7}
-};
-#endif
-
-struct DCtabSet
-{
-	DCtab lum0[32];		// Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110
-	DCtab lum1[16];		// Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111
-	DCtab chrom0[32];	// Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110
-	DCtab chrom1[32];	// Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111
-};
-
-alignas(16) static const DCtabSet DCtable =
-{
-	// lum0: Table B-12, dct_dc_size_luminance, codes 00xxx ... 11110 */
-	{ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-	  {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-	  {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
-	  {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}, {0, 0} },
-
-	/* lum1: Table B-12, dct_dc_size_luminance, codes 111110xxx ... 111111111 */
-	{ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6},
-	  {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10,9}, {11,9} },
-
-	/* chrom0: Table B-13, dct_dc_size_chrominance, codes 00xxx ... 11110 */
-	{ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
-	  {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-	  {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-	  {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}, {0, 0} },
-
-	/* chrom1: Table B-13, dct_dc_size_chrominance, codes 111110xxxx ... 1111111111 */
-	{ {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
-	  {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6}, {6, 6},
-	  {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7}, {7, 7},
-	  {8, 8}, {8, 8}, {8, 8}, {8, 8}, {9, 9}, {9, 9}, {10,10}, {11,10} },
-};
-
-struct DCTtabSet
-{
-	DCTtab first[12];
-	DCTtab next[12];
-
-	DCTtab tab0[60];
-	DCTtab tab0a[252];
-	DCTtab tab1[8];
-	DCTtab tab1a[8];
-
-	DCTtab tab2[16];
-	DCTtab tab3[16];
-	DCTtab tab4[16];
-	DCTtab tab5[16];
-	DCTtab tab6[16];
-};
-
-alignas(16) static const DCTtabSet DCT =
-{
-	/* first[12]: Table B-14, DCT coefficients table zero,
-	 * codes 0100 ... 1xxx (used for first (DC) coefficient)
-	 */
-	{ {0,2,4}, {2,1,4}, {1,1,3}, {1,1,3},
-	  {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1},
-	  {0,1,1}, {0,1,1}, {0,1,1}, {0,1,1} },
-
-	/* next[12]: Table B-14, DCT coefficients table zero,
-	 * codes 0100 ... 1xxx (used for all other coefficients)
-	 */
-	{ {0,2,4},  {2,1,4},  {1,1,3},  {1,1,3},
-	  {64,0,2}, {64,0,2}, {64,0,2}, {64,0,2}, /* EOB */
-	  {0,1,2},  {0,1,2},  {0,1,2},  {0,1,2} },
-
-	/* tab0[60]: Table B-14, DCT coefficients table zero,
-	 * codes 000001xx ... 00111xxx
-	 */
-	{ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
-	  {2,2,7}, {2,2,7}, {9,1,7}, {9,1,7},
-	  {0,4,7}, {0,4,7}, {8,1,7}, {8,1,7},
-	  {7,1,6}, {7,1,6}, {7,1,6}, {7,1,6},
-	  {6,1,6}, {6,1,6}, {6,1,6}, {6,1,6},
-	  {1,2,6}, {1,2,6}, {1,2,6}, {1,2,6},
-	  {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
-	  {13,1,8}, {0,6,8}, {12,1,8}, {11,1,8},
-	  {3,2,8}, {1,3,8}, {0,5,8}, {10,1,8},
-	  {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
-	  {0,3,5}, {0,3,5}, {0,3,5}, {0,3,5},
-	  {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
-	  {4,1,5}, {4,1,5}, {4,1,5}, {4,1,5},
-	  {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
-	  {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5} },
-
-	/* tab0a[252]: Table B-15, DCT coefficients table one,
-	 * codes 000001xx ... 11111111
-	 */
-	{ {65,0,6}, {65,0,6}, {65,0,6}, {65,0,6}, /* Escape */
-	  {7,1,7}, {7,1,7}, {8,1,7}, {8,1,7},
-	  {6,1,7}, {6,1,7}, {2,2,7}, {2,2,7},
-	  {0,7,6}, {0,7,6}, {0,7,6}, {0,7,6},
-	  {0,6,6}, {0,6,6}, {0,6,6}, {0,6,6},
-	  {4,1,6}, {4,1,6}, {4,1,6}, {4,1,6},
-	  {5,1,6}, {5,1,6}, {5,1,6}, {5,1,6},
-	  {1,5,8}, {11,1,8}, {0,11,8}, {0,10,8},
-	  {13,1,8}, {12,1,8}, {3,2,8}, {1,4,8},
-	  {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
-	  {2,1,5}, {2,1,5}, {2,1,5}, {2,1,5},
-	  {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
-	  {1,2,5}, {1,2,5}, {1,2,5}, {1,2,5},
-	  {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
-	  {3,1,5}, {3,1,5}, {3,1,5}, {3,1,5},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {1,1,3}, {1,1,3}, {1,1,3}, {1,1,3},
-	  {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4}, /* EOB */
-	  {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
-	  {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
-	  {64,0,4}, {64,0,4}, {64,0,4}, {64,0,4},
-	  {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
-	  {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
-	  {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
-	  {0,3,4}, {0,3,4}, {0,3,4}, {0,3,4},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,1,2}, {0,1,2}, {0,1,2}, {0,1,2},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,2,3}, {0,2,3}, {0,2,3}, {0,2,3},
-	  {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
-	  {0,4,5}, {0,4,5}, {0,4,5}, {0,4,5},
-	  {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
-	  {0,5,5}, {0,5,5}, {0,5,5}, {0,5,5},
-	  {9,1,7}, {9,1,7}, {1,3,7}, {1,3,7},
-	  {10,1,7}, {10,1,7}, {0,8,7}, {0,8,7},
-	  {0,9,7}, {0,9,7}, {0,12,8}, {0,13,8},
-	  {2,3,8}, {4,2,8}, {0,14,8}, {0,15,8} },
-
-	/* Table B-14, DCT coefficients table zero,
-	 * codes 0000001000 ... 0000001111
-	 */
-	{ {16,1,10}, {5,2,10}, {0,7,10}, {2,3,10},
-	  {1,4,10}, {15,1,10}, {14,1,10}, {4,2,10} },
-
-	/* Table B-15, DCT coefficients table one,
-	 * codes 000000100x ... 000000111x
-	 */
-	{ {5,2,9}, {5,2,9}, {14,1,9}, {14,1,9},
-	  {2,4,10}, {16,1,10}, {15,1,9}, {15,1,9} },
-
-	/* Table B-14/15, DCT coefficients table zero / one,
-	 * codes 000000010000 ... 000000011111
-	 */
-	{ {0,11,12}, {8,2,12}, {4,3,12}, {0,10,12},
-	  {2,4,12}, {7,2,12}, {21,1,12}, {20,1,12},
-	  {0,9,12}, {19,1,12}, {18,1,12}, {1,5,12},
-	  {3,3,12}, {0,8,12}, {6,2,12}, {17,1,12} },
-
-	/* Table B-14/15, DCT coefficients table zero / one,
-	 * codes 0000000010000 ... 0000000011111
-	 */
-	{ {10,2,13}, {9,2,13}, {5,3,13}, {3,4,13},
-	  {2,5,13}, {1,7,13}, {1,6,13}, {0,15,13},
-	  {0,14,13}, {0,13,13}, {0,12,13}, {26,1,13},
-	  {25,1,13}, {24,1,13}, {23,1,13}, {22,1,13} },
-
-	/* Table B-14/15, DCT coefficients table zero / one,
-	 * codes 00000000010000 ... 00000000011111
-	 */
-	{ {0,31,14}, {0,30,14}, {0,29,14}, {0,28,14},
-	  {0,27,14}, {0,26,14}, {0,25,14}, {0,24,14},
-	  {0,23,14}, {0,22,14}, {0,21,14}, {0,20,14},
-	  {0,19,14}, {0,18,14}, {0,17,14}, {0,16,14} },
-
-	/* Table B-14/15, DCT coefficients table zero / one,
-	 * codes 000000000010000 ... 000000000011111
-	 */
-	{ {0,40,15}, {0,39,15}, {0,38,15}, {0,37,15},
-	  {0,36,15}, {0,35,15}, {0,34,15}, {0,33,15},
-	  {0,32,15}, {1,14,15}, {1,13,15}, {1,12,15},
-	  {1,11,15}, {1,10,15}, {1,9,15}, {1,8,15} },
-
-	/* Table B-14/15, DCT coefficients table zero / one,
-	 * codes 0000000000010000 ... 0000000000011111
-	 */
-	{ {1,18,16}, {1,17,16}, {1,16,16}, {1,15,16},
-	  {6,3,16}, {16,2,16}, {15,2,16}, {14,2,16},
-	  {13,2,16}, {12,2,16}, {11,2,16}, {31,1,16},
-	  {30,1,16}, {29,1,16}, {28,1,16}, {27,1,16} }
-
-};
diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp
index 51db3182e5..c0ef19b3d2 100644
--- a/pcsx2/IPU/yuv2rgb.cpp
+++ b/pcsx2/IPU/yuv2rgb.cpp
@@ -20,9 +20,9 @@
 #include "PrecompiledHeader.h"
 
 #include "Common.h"
-#include "IPU.h"
-#include "yuv2rgb.h"
-#include "mpeg2lib/Mpeg.h"
+#include "IPU/IPU.h"
+#include "IPU/IPU_MultiISA.h"
+#include "IPU/yuv2rgb.h"
 
 // The IPU's colour space conversion conforms to ITU-R Recommendation BT.601 if anyone wants to make a
 // faster or "more accurate" implementation, but this is the precise documented integer method used by
diff --git a/pcsx2/pcsx2core.vcxproj b/pcsx2/pcsx2core.vcxproj
index 46018909b2..ed26318a7d 100644
--- a/pcsx2/pcsx2core.vcxproj
+++ b/pcsx2/pcsx2core.vcxproj
@@ -450,8 +450,6 @@
     <ClCompile Include="Ipu\IPU_Fifo.cpp" />
     <ClCompile Include="Ipu\IPU_MultiISA.cpp" />
     <ClCompile Include="Ipu\yuv2rgb.cpp" />
-    <ClCompile Include="Ipu\mpeg2lib\Idct.cpp" />
-    <ClCompile Include="Ipu\mpeg2lib\Mpeg.cpp" />
     <ClCompile Include="GS.cpp" />
     <ClCompile Include="MTGS.cpp" />
     <ClCompile Include="DebugTools\DisR3000A.cpp" />
@@ -579,6 +577,7 @@
     <ClInclude Include="HostDisplay.h" />
     <ClInclude Include="HostSettings.h" />
     <ClInclude Include="IopGte.h" />
+    <ClInclude Include="IPU\mpeg2_vlc.h" />
     <ClInclude Include="MemoryCardProtocol.h" />
     <ClInclude Include="MultitapProtocol.h" />
     <ClInclude Include="PAD\Host\Global.h" />
@@ -782,8 +781,6 @@
     <ClInclude Include="Ipu\IPU_Fifo.h" />
     <ClInclude Include="Ipu\IPU_MultiISA.h" />
     <ClInclude Include="Ipu\yuv2rgb.h" />
-    <ClInclude Include="Ipu\mpeg2lib\Mpeg.h" />
-    <ClInclude Include="Ipu\mpeg2lib\Vlc.h" />
     <ClInclude Include="GS.h" />
     <ClInclude Include="DebugTools\Debug.h" />
     <ClInclude Include="DebugTools\DisASM.h" />
diff --git a/pcsx2/pcsx2core.vcxproj.filters b/pcsx2/pcsx2core.vcxproj.filters
index a29bd0c053..4bea3e0524 100644
--- a/pcsx2/pcsx2core.vcxproj.filters
+++ b/pcsx2/pcsx2core.vcxproj.filters
@@ -94,9 +94,6 @@
     <Filter Include="System\Ps2\IPU">
       <UniqueIdentifier>{4dab2d06-69e0-4f3e-b6d3-45e5e85af940}</UniqueIdentifier>
     </Filter>
-    <Filter Include="System\Ps2\IPU\mpeg2lib">
-      <UniqueIdentifier>{67e51016-d1db-44d2-910d-349d2833f798}</UniqueIdentifier>
-    </Filter>
     <Filter Include="System\Ps2\GS">
       <UniqueIdentifier>{5602cc18-9d1c-49c8-9509-7e4cf9ecd91b}</UniqueIdentifier>
     </Filter>
@@ -653,12 +650,6 @@
     <ClCompile Include="IPU\yuv2rgb.cpp">
       <Filter>System\Ps2\IPU</Filter>
     </ClCompile>
-    <ClCompile Include="IPU\mpeg2lib\Idct.cpp">
-      <Filter>System\Ps2\IPU\mpeg2lib</Filter>
-    </ClCompile>
-    <ClCompile Include="IPU\mpeg2lib\Mpeg.cpp">
-      <Filter>System\Ps2\IPU\mpeg2lib</Filter>
-    </ClCompile>
     <ClCompile Include="GS.cpp">
       <Filter>System\Ps2\GS\GIF</Filter>
     </ClCompile>
@@ -1619,12 +1610,6 @@
     <ClInclude Include="IPU\yuv2rgb.h">
       <Filter>System\Ps2\IPU</Filter>
     </ClInclude>
-    <ClInclude Include="IPU\mpeg2lib\Mpeg.h">
-      <Filter>System\Ps2\IPU\mpeg2lib</Filter>
-    </ClInclude>
-    <ClInclude Include="IPU\mpeg2lib\Vlc.h">
-      <Filter>System\Ps2\IPU\mpeg2lib</Filter>
-    </ClInclude>
     <ClInclude Include="DebugTools\Debug.h">
       <Filter>System\Ps2\Debug</Filter>
     </ClInclude>
@@ -2345,6 +2330,9 @@
     <ClInclude Include="GS\Renderers\HW\GSHwHack.h">
       <Filter>System\Ps2\GS\Renderers\Hardware</Filter>
     </ClInclude>
+    <ClInclude Include="IPU\mpeg2_vlc.h">
+      <Filter>System\Ps2\IPU</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <CustomBuildStep Include="rdebug\deci2.h">