/******************************************************************************/ /* Mednafen NEC PC-FX Emulation Module */ /******************************************************************************/ /* rainbow.cpp: ** Copyright (C) 2006-2016 Mednafen Team ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software Foundation, Inc., ** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* ** MJPEG-like decoder based on the algorithm and Huffman data tables provided by David Michel ** of MagicEngine and MagicEngine-FX. */ #include "pcfx.h" #include "rainbow.h" #include "king.h" #include "interrupt.h" #include "jrevdct.h" namespace MDFN_IEN_PCFX { static bool ChromaIP; // Bilinearly interpolate chroma channel /* Y = luminance/luma, UV = chrominance/chroma */ struct HuffmanQuickLUTPair { uint8 val; uint8 bitc; // Bit count for the code. }; static const uint8 zigzag[63] = { 0x01, 0x08, 0x10, 0x09, 0x02, 0x03, 0x0A, 0x11, 0x18, 0x20, 0x19, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, 0x1A, 0x21, 0x28, 0x30, 0x29, 0x22, 0x1B, 0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1C, 0x23, 0x2A, 0x31, 0x38, 0x39, 0x32, 0x2B, 0x24, 0x1D, 0x16, 0x0F, 0x17, 0x1E, 0x25, 0x2C, 0x33, 0x3A, 0x3B, 0x34, 0x2D, 0x26, 0x1F, 0x27, 0x2E, 0x35, 0x3C, 0x3D, 0x36, 0x2F, 0x37, 0x3E, 0x3F}; static const HuffmanQuickLUTPair ac_y_qlut[1 << 12] = { #include "rainbow_acy.inc" }; static const HuffmanQuickLUTPair ac_uv_qlut[1 << 12] = { #include "rainbow_acuv.inc" }; static const HuffmanQuickLUTPair dc_y_qlut[1 << 9] = { #include "rainbow_dcy.inc" }; static const HuffmanQuickLUTPair dc_uv_qlut[1 << 8] = { #include "rainbow_dcuv.inc" }; static uint8 *DecodeBuffer[2] = {NULL, NULL}; static int32 DecodeFormat[2]; // The format each buffer is in(-1 = invalid, 0 = palettized 8-bit, 1 = YUV) static uint32 QuantTables[2][64], QuantTablesBase[2][64]; // 0 = Y, 1 = UV static uint32 DecodeBufferWhichRead; static int32 RasterReadPos; static uint16 Control; static uint16 NullRunY, NullRunU, NullRunV, HSync; static uint16 HScroll; static uint32 bits_buffer; static uint32 bits_buffered_bits; static int32 bits_bytes_left; static void InitBits(int32 bcount) { bits_bytes_left = bcount; bits_buffer = 0; bits_buffered_bits = 0; } static INLINE uint8 FetchWidgywabbit(void) { if (bits_bytes_left <= 0) return (0); uint8 ret = KING_RB_Fetch(); if (ret == 0xFF) KING_RB_Fetch(); bits_bytes_left--; return (ret); } enum { MDFNBITS_PEEK = 1, MDFNBITS_FUNNYSIGN = 2, }; static INLINE uint32 GetBits(const unsigned int count, const unsigned int how = 0) { uint32 ret; while (bits_buffered_bits < count) { bits_buffer <<= 8; bits_buffer |= FetchWidgywabbit(); bits_buffered_bits += 8; } ret = (bits_buffer >> (bits_buffered_bits - count)) & ((1 << count) - 1); if (!(how & MDFNBITS_PEEK)) bits_buffered_bits -= count; if ((how & MDFNBITS_FUNNYSIGN) && count) { if (ret < (1U << (count - 1))) ret += 1 - (1 << count); } return (ret); } // Note: SkipBits is intended to be called right after GetBits() with "how" MDFNBITS_PEEK, // and the count pass to SkipBits must be less than or equal to the count passed to GetBits(). static INLINE void SkipBits(const unsigned int count) { bits_buffered_bits -= count; } static uint32 HappyColor; // Cached, calculated from null run yuv registers; static void CalcHappyColor(void) { uint8 y_c = (int16)NullRunY + 0x80; uint8 u_c = (int16)NullRunU + 0x80; uint8 v_c = (int16)NullRunV + 0x80; HappyColor = (y_c << 16) | (u_c << 8) | (v_c << 0); } static uint32 get_ac_coeff(const HuffmanQuickLUTPair *table, int32 *zeroes) { unsigned int numbits; uint32 rawbits; uint32 code; rawbits = GetBits(12, MDFNBITS_PEEK); code = table[rawbits].val; SkipBits(table[rawbits].bitc); numbits = code & 0xF; *zeroes = code >> 4; return (GetBits(numbits, MDFNBITS_FUNNYSIGN)); } static uint32 get_dc_coeff(const HuffmanQuickLUTPair *table, int32 *zeroes, int maxbits) { uint32 code; for (;;) { uint32 rawbits; rawbits = GetBits(maxbits, MDFNBITS_PEEK); code = table[rawbits].val; SkipBits(table[rawbits].bitc); if (code < 0xF) { *zeroes = 0; return (GetBits(code, MDFNBITS_FUNNYSIGN)); } else if (code == 0xF) { get_ac_coeff(ac_y_qlut, zeroes); (*zeroes)++; return (0); } else if (code >= 0x10) { code -= 0x10; for (int i = 0; i < 64; i++) { // Y uint32 coeff = (QuantTablesBase[0][i] * code) >> 2; if (coeff < 1) coeff = 1; else if (coeff > 0xFE) coeff = 0xFE; QuantTables[0][i] = coeff; // UV if (i) coeff = (QuantTablesBase[1][i] * code) >> 2; else coeff = (QuantTablesBase[1][i]) >> 2; if (coeff < 1) coeff = 1; else if (coeff > 0xFE) coeff = 0xFE; QuantTables[1][i] = coeff; } } } } static INLINE uint32 get_dc_y_coeff(int32 *zeroes) { return (get_dc_coeff(dc_y_qlut, zeroes, 9)); } static uint32 get_dc_uv_coeff(void) { const HuffmanQuickLUTPair *table = dc_uv_qlut; uint32 code; uint32 rawbits = GetBits(8, MDFNBITS_PEEK); code = table[rawbits].val; SkipBits(table[rawbits].bitc); return (GetBits(code, MDFNBITS_FUNNYSIGN)); } static void decode(int32 *dct, const uint32 *QuantTable, const int32 dc, const HuffmanQuickLUTPair *table) { int32 coeff; int zeroes; int count; int index; dct[0] = (int16)(QuantTable[0] * dc); count = 0; do { coeff = get_ac_coeff(table, &zeroes); if (!coeff) { if (!zeroes) { while (count < 63) { dct[zigzag[count++]] = 0; } break; } else if (zeroes == 1) zeroes = 0xF; } while (zeroes-- && count < 63) { dct[zigzag[count++]] = 0; } zeroes = 0; if (count < 63) { index = zigzag[count++]; dct[index] = (int16)(QuantTable[index] * coeff); } } while (count < 63); } #ifdef WANT_DEBUGGER uint32 RAINBOW_GetRegister(const unsigned int id, char *special, const uint32 special_len) { uint32 value = 0xDEADBEEF; if (id == RAINBOW_GSREG_RSCRLL) value = HScroll; else if (id == RAINBOW_GSREG_RCTRL) value = Control; else if (id == RAINBOW_GSREG_RNRY) value = NullRunY; else if (id == RAINBOW_GSREG_RNRU) value = NullRunU; else if (id == RAINBOW_GSREG_RNRV) value = NullRunV; else if (id == RAINBOW_GSREG_RHSYNC) value = HSync; return (value); } void RAINBOW_SetRegister(const unsigned int id, uint32 value) { if (id == RAINBOW_GSREG_RSCRLL) HScroll = value & 0x1FF; else if (id == RAINBOW_GSREG_RCTRL) Control = value; else if (id == RAINBOW_GSREG_RNRY) NullRunY = value; else if (id == RAINBOW_GSREG_RNRU) NullRunU = value; else if (id == RAINBOW_GSREG_RNRV) NullRunV = value; } #endif static uint32 LastLine[256]; static bool FirstDecode; static bool GarbageData; void RAINBOW_Init(bool arg_ChromaIP) { ChromaIP = arg_ChromaIP; for (int i = 0; i < 2; i++) { DecodeBuffer[i] = new uint8[0x2000 * 4]; memset(DecodeBuffer[i], 0, 0x2000 * 4); } DecodeFormat[0] = DecodeFormat[1] = -1; DecodeBufferWhichRead = 0; GarbageData = FALSE; FirstDecode = TRUE; RasterReadPos = 0; } // RAINBOW base I/O port address: 0x200 #define REGSETBOFW(_reg, _data, _wb) \ { \ (_reg) &= ~(0xFF << ((_wb)*8)); \ (_reg) |= (_data) << ((_wb)*8); \ } #define REGSETBOFH REGSETBOFW // The horizontal scroll register is set up kind of weird...D0-D7 of writes to $200 set the lower byte, D0-D7 of writes to $202 set the upper byte void RAINBOW_Write8(uint32 A, uint8 V) { //printf("RAINBOW Wr8: %08x %02x\n", A, V); switch (A & 0x1C) { case 0x00: REGSETBOFH(HScroll, V, (A & 0x2) >> 1); HScroll &= 0x1FF; break; case 0x04: REGSETBOFH(Control, V, A & 0x3); break; case 0x08: REGSETBOFH(NullRunY, V, A & 0x3); CalcHappyColor(); break; case 0x0C: REGSETBOFH(NullRunU, V, A & 0x3); CalcHappyColor(); break; case 0x10: REGSETBOFH(NullRunV, V, A & 0x3); CalcHappyColor(); break; case 0x14: REGSETBOFH(HSync, V, A & 0x3); break; } } void RAINBOW_Write16(uint32 A, uint16 V) { int msh = A & 0x2; //printf("RAINBOW Wr16: %08x %04x\n", A, V); switch (A & 0x1C) { case 0x00: REGSETBOFH(HScroll, V & 0xFF, (A & 0x2) >> 1); HScroll &= 0x1FF; break; case 0x04: REGSETHW(Control, V, msh); break; case 0x08: REGSETHW(NullRunY, V, msh); CalcHappyColor(); break; case 0x0C: REGSETHW(NullRunU, V, msh); CalcHappyColor(); break; case 0x10: REGSETHW(NullRunV, V, msh); CalcHappyColor(); break; case 0x14: REGSETHW(HSync, V, msh); break; } } void RAINBOW_ForceTransferReset(void) { RasterReadPos = 0; DecodeFormat[0] = DecodeFormat[1] = -1; } void RAINBOW_SwapBuffers(void) { DecodeBufferWhichRead ^= 1; RasterReadPos = 0; } void RAINBOW_DecodeBlock(bool arg_FirstDecode, bool Skip) { uint8 block_type; int32 block_size; int icount; int which_buffer = DecodeBufferWhichRead ^ 1; if (!(Control & 0x01)) { puts("Rainbow decode when disabled!!"); return; } if (arg_FirstDecode) { FirstDecode = TRUE; GarbageData = FALSE; } if (GarbageData) icount = 0; else icount = 0x200; do { do { while (KING_RB_Fetch() != 0xFF && icount > 0) icount--; block_type = KING_RB_Fetch(); //if(icount > 0 && block_type != 0xF0 && block_type != 0xF1 && block_type != 0xF2 && block_type != 0xF3 && block_type != 0xF8 && block_type != 0xFF) //if(icount > 0 && block_type == 0x11) // printf("%02x\n", block_type); icount--; } while (block_type != 0xF0 && block_type != 0xF1 && block_type != 0xF2 && block_type != 0xF3 && block_type != 0xF8 && block_type != 0xFF && icount > 0); { uint16 tmp; tmp = KING_RB_Fetch() << 8; tmp |= KING_RB_Fetch() << 0; block_size = (int16)tmp; } block_size -= 2; if (block_type == 0xFF && block_size <= 0) for (int i = 0; i < 128; i++, icount--) KING_RB_Fetch(); //fprintf(stderr, "Block: %d\n", block_size); } while (block_size <= 0 && icount > 0); //if(!GarbageData && icount < 500) //{ // FXDBG("Partial garbage data. %d", icount); //} //printf("%d\n", icount); if (icount <= 0) { FXDBG("Garbage data."); GarbageData = TRUE; //printf("Dooom: %d\n"); DecodeFormat[which_buffer] = 0; memset(DecodeBuffer[which_buffer], 0, 0x2000); goto BufferNoDecode; } if (block_type == 0xf8 || block_type == 0xff) DecodeFormat[which_buffer] = 1; else DecodeFormat[which_buffer] = 0; if (block_type == 0xF8 || block_type == 0xFF) { if (block_type == 0xFF) { for (int q = 0; q < 2; q++) for (int i = 0; i < 64; i++) { uint8 meow = KING_RB_Fetch(); QuantTables[q][i] = meow; QuantTablesBase[q][i] = meow; } block_size -= 128; } InitBits(block_size); int32 dc_y = 0, dc_u = 0, dc_v = 0; uint32 *dest_base = (uint32 *)DecodeBuffer[which_buffer]; for (int column = 0; column < 16; column++) { uint32 *dest_base_column = &dest_base[column * 16]; int zeroes = 0; dc_y += get_dc_y_coeff(&zeroes); if (zeroes) // If set, clear the number of columns { do { if (column < 16) { dest_base_column = &dest_base[column * 16]; for (int y = 0; y < 16; y++) for (int x = 0; x < 16; x++) dest_base_column[y * 256 + x] = HappyColor; } column++; zeroes--; } while (zeroes); column--; // Fix for the column autoincrement in the while(zeroes) loop dc_y = dc_u = dc_v = 0; } else { int32 dct_y[256]; int32 dct_u[64]; int32 dct_v[64]; // Y/Luma, 16x16 components // --------- // | A | C | // |-------| // | B | D | // --------- // A (0, 0) decode(&dct_y[0x00], QuantTables[0], dc_y, ac_y_qlut); // B (0, 1) dc_y += get_dc_y_coeff(&zeroes); decode(&dct_y[0x40], QuantTables[0], dc_y, ac_y_qlut); // C (1, 0) dc_y += get_dc_y_coeff(&zeroes); decode(&dct_y[0x80], QuantTables[0], dc_y, ac_y_qlut); // D (1, 1) dc_y += get_dc_y_coeff(&zeroes); decode(&dct_y[0xC0], QuantTables[0], dc_y, ac_y_qlut); // U, 8x8 components dc_u += get_dc_uv_coeff(); decode(&dct_u[0x00], QuantTables[1], dc_u, ac_uv_qlut); // V, 8x8 components dc_v += get_dc_uv_coeff(); decode(&dct_v[0x00], QuantTables[1], dc_v, ac_uv_qlut); if (Skip) continue; j_rev_dct(&dct_y[0x00]); j_rev_dct(&dct_y[0x40]); j_rev_dct(&dct_y[0x80]); j_rev_dct(&dct_y[0xC0]); j_rev_dct(&dct_u[0x00]); j_rev_dct(&dct_v[0x00]); for (int y = 0; y < 16; y++) for (int x = 0; x < 16; x++) dest_base_column[y * 256 + x] = clamp_to_u8(dct_y[y * 8 + (x & 0x7) + ((x & 0x8) << 4)] + 0x80) << 16; if (!ChromaIP) { for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { uint32 component_uv = (clamp_to_u8(dct_u[y * 8 + x] + 0x80) << 8) | clamp_to_u8(dct_v[y * 8 + x] + 0x80); dest_base_column[y * 512 + (256 * 0) + x * 2 + 0] |= component_uv; dest_base_column[y * 512 + (256 * 0) + x * 2 + 1] |= component_uv; dest_base_column[y * 512 + (256 * 1) + x * 2 + 0] |= component_uv; dest_base_column[y * 512 + (256 * 1) + x * 2 + 1] |= component_uv; } } } else { for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { uint32 component_uv = (clamp_to_u8(dct_u[y * 8 + x] + 0x80) << 8) | clamp_to_u8(dct_v[y * 8 + x] + 0x80); dest_base_column[y * 512 + (256 * 1) + x * 2 + 0] |= component_uv; } } } } } // Do bilinear interpolation on the chroma channels: if (!Skip && ChromaIP) { for (int y = 0; y < 16; y += 2) { uint32 *linebase = &dest_base[y * 256]; uint32 *linebase1 = &dest_base[(y + 1) * 256]; for (int x = 0; x < 254; x += 2) { unsigned int u, v; u = (((linebase1[x] >> 8) & 0xFF) + ((linebase1[x + 2] >> 8) & 0xFF)) >> 1; v = (((linebase1[x] >> 0) & 0xFF) + ((linebase1[x + 2] >> 0) & 0xFF)) >> 1; linebase1[x + 1] = (linebase1[x + 1] & ~0xFFFF) | (u << 8) | v; } linebase1[0xFF] = (linebase1[0xFF] & ~0xFFFF) | (linebase1[0xFE] & 0xFFFF); if (FirstDecode) { for (int x = 0; x < 256; x++) linebase[x] = (linebase[x] & ~0xFFFF) | (linebase1[x] & 0xFFFF); FirstDecode = 0; } else for (int x = 0; x < 256; x++) { unsigned int u, v; u = (((LastLine[x] >> 8) & 0xFF) + ((linebase1[x] >> 8) & 0xFF)) >> 1; v = (((LastLine[x] >> 0) & 0xFF) + ((linebase1[x] >> 0) & 0xFF)) >> 1; linebase[x] = (linebase[x] & ~0xFFFF) | (u << 8) | v; } memcpy(LastLine, linebase1, 256 * 4); } } // End chroma interpolation } // end jpeg-like decoding else { // Earlier code confines the set to F0,F1,F2, and F3. // F0 = (4, 0xF), F1 = (3, 0x7), F2 = (0x2, 0x3), F3 = 0x1, 0x1) const unsigned int plt_shift = 4 - (block_type & 0x3); const unsigned int crl_mask = (1 << plt_shift) - 1; int x = 0; while (block_size > 0) { uint8 boot; unsigned int rle_count; boot = KING_RB_Fetch(); block_size--; if (boot == 0xFF) { KING_RB_Fetch(); block_size--; } if (!(boot & crl_mask)) // Expand mode? { rle_count = KING_RB_Fetch(); block_size--; if (rle_count == 0xFF) { KING_RB_Fetch(); block_size--; } rle_count++; } else rle_count = boot & crl_mask; for (unsigned int i = 0; i < rle_count; i++) { if (x >= 0x2000) { //puts("Oops"); break; // Don't overflow our decode buffer! } DecodeBuffer[which_buffer][x] = (boot >> plt_shift); x++; } } } // end RLE decoding //for(int i = 0; i < 8 + block_size; i++) // KING_RB_Fetch(); BufferNoDecode:; } void KING_Moo(void); // NOTE: layer_or and palette_ptr are optimizations, the real RAINBOW chip knows not of such things. int RAINBOW_FetchRaster(uint32 *linebuffer, uint32 layer_or, uint32 *palette_ptr) { int ret; ret = DecodeFormat[DecodeBufferWhichRead]; if (linebuffer) { linebuffer = MDFN_ASSUME_ALIGNED(linebuffer, 8); // if (DecodeFormat[DecodeBufferWhichRead] == -1) // None { MDFN_FastArraySet(linebuffer, 0, 256); } else if (DecodeFormat[DecodeBufferWhichRead] == 1) // YUV { uint32 *in_ptr = (uint32 *)&DecodeBuffer[DecodeBufferWhichRead][RasterReadPos * 256 * 4]; if (Control & 0x2) // Endless scroll mode: { uint8 tmpss = HScroll; for (int x = 0; x < 256; x++) { linebuffer[x] = in_ptr[tmpss] | layer_or; tmpss++; } } else // Non-endless { uint16 tmpss = HScroll & 0x1FF; for (int x = 0; x < 256; x++) { linebuffer[x] = (tmpss < 256) ? (in_ptr[tmpss] | layer_or) : 0; tmpss = (tmpss + 1) & 0x1FF; } } MDFN_FastArraySet(in_ptr, 0, 256); } else if (DecodeFormat[DecodeBufferWhichRead] == 0) // Palette { uint8 *in_ptr = &DecodeBuffer[DecodeBufferWhichRead][RasterReadPos * 256]; if (Control & 0x2) // Endless scroll mode: { uint8 tmpss = HScroll; for (int x = 0; x < 256; x++) { linebuffer[x] = in_ptr[tmpss] ? (palette_ptr[in_ptr[tmpss]] | layer_or) : 0; tmpss++; } } else // Non-endless { uint16 tmpss = HScroll & 0x1FF; for (int x = 0; x < 256; x++) { linebuffer[x] = (tmpss < 256 && in_ptr[tmpss]) ? (palette_ptr[in_ptr[tmpss]] | layer_or) : 0; tmpss = (tmpss + 1) & 0x1FF; } } //MDFN_FastU32MemsetM8((uint32 *)in_ptr, 0, 256 / 4); } } RasterReadPos = (RasterReadPos + 1) & 0xF; if (!RasterReadPos) DecodeFormat[DecodeBufferWhichRead] = -1; // Invalidate this buffer. //printf("Fetch: %d, buffer: %d\n", RasterReadPos, DecodeBufferWhichRead); return (ret); } void RAINBOW_Reset(void) { Control = 0; NullRunY = NullRunU = NullRunV = 0; HScroll = 0; RasterReadPos = 0; DecodeBufferWhichRead = 0; memset(QuantTables, 0, sizeof(QuantTables)); memset(QuantTablesBase, 0, sizeof(QuantTablesBase)); DecodeFormat[0] = DecodeFormat[1] = -1; CalcHappyColor(); } }