// Project64 - A Nintendo 64 emulator // https://www.pj64-emu.com/ // Copyright(C) 2001-2021 Project64 // Copyright(C) 2007 Hiroshi Morii // Copyright(C) 2003 Rice1964 // GNU/GPLv2 licensed: https://gnu.org/licenses/gpl-2.0.html #include "TxUtil.h" #include "TxDbg.h" #include #include #include #include #ifdef _WIN32 #include #endif // External libraries TxLoadLib::TxLoadLib() { _tx_compress_dxtn = tx_compress_dxtn; _tx_compress_fxt1 = fxt1_encode; } TxLoadLib::~TxLoadLib() { } fxtCompressTexFuncExt TxLoadLib::getfxtCompressTexFuncExt() { return _tx_compress_fxt1; } dxtCompressTexFuncExt TxLoadLib::getdxtCompressTexFuncExt() { return _tx_compress_dxtn; } // Utilities uint32 TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format) { int dataSize = sizeofTx(width, height, format); // For now we use adler32 if something else is better // we can simply switch later // return (dataSize ? Adler32(src, dataSize, 1) : 0); // zlib crc32 return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0); } int TxUtil::sizeofTx(int width, int height, uint16 format) { int dataSize = 0; // TODO: A lookup table for the shifts would be better switch (format) { case GFX_TEXFMT_ARGB_CMP_FXT1: dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1; break; case GFX_TEXFMT_ARGB_CMP_DXT1: dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1; break; case GFX_TEXFMT_ARGB_CMP_DXT3: case GFX_TEXFMT_ARGB_CMP_DXT5: dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3); break; case GFX_TEXFMT_ALPHA_INTENSITY_44: case GFX_TEXFMT_ALPHA_8: case GFX_TEXFMT_INTENSITY_8: case GFX_TEXFMT_P_8: dataSize = width * height; break; case GFX_TEXFMT_ARGB_4444: case GFX_TEXFMT_ARGB_1555: case GFX_TEXFMT_RGB_565: case GFX_TEXFMT_ALPHA_INTENSITY_88: dataSize = (width * height) << 1; break; case GFX_TEXFMT_ARGB_8888: dataSize = (width * height) << 2; break; default: // Unsupported format DBG_INFO(80, "Error: Cannot get size. Unsupported gfmt:%x\n", format); ; } return dataSize; } uint32 TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride) { // Rice CRC32 for now. We can switch this to MD5 or // any other custom checksum. // TODO: use *_HIRESTEXTURE option. if (!src) return 0; return RiceCRC32(src, width, height, size, rowStride); } uint64_t TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) { // Rice CRC32 for now. We can switch this to MD5 or // any other custom checksum. // TODO: use *_HIRESTEXTURE option. // Returned value is 64bits: hi=palette crc32 low=texture crc32 if (!src) return 0; uint64_t crc64Ret = 0; if (palette) { uint32 crc32 = 0, cimax = 0; switch (size & 0xff) { case 1: if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) { crc64Ret = (uint64_t)RiceCRC32(palette, cimax + 1, 1, 2, 512); crc64Ret <<= 32; crc64Ret |= (uint64_t)crc32; } break; case 0: if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) { crc64Ret = (uint64_t)RiceCRC32(palette, cimax + 1, 1, 2, 32); crc64Ret <<= 32; crc64Ret |= (uint64_t)crc32; } } } if (!crc64Ret) { crc64Ret = (uint64_t)RiceCRC32(src, width, height, size, rowStride); } return crc64Ret; } /* Computes Adler32 checksum for a stream of data. From the specification found in RFC 1950: (ZLIB Compressed Data Format Specification version 3.3) ADLER32 (Adler-32 checksum) This contains a checksum value of the uncompressed data (excluding any dictionary data) computed according to Adler-32 algorithm. This algorithm is a 32-bit extension and improvement of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard. Adler-32 is composed of two sums accumulated per byte: s1 is the sum of all bytes, s2 is the sum of all s1 values. Both sums are done modulo 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored as s2*65536 + s1 in most-significant-byte first (network) order. 8.2. The Adler-32 algorithm The Adler-32 algorithm is much faster than the CRC32 algorithm yet still provides an extremely low probability of undetected errors. The modulo on unsigned long accumulators can be delayed for 5552 bytes, so the modulo operation time is negligible. If the bytes are a, b, c, the second sum is 3a + 2b + c + 3, and so is position and order sensitive, unlike the first sum, which is just a checksum. That 65521 is prime is important to avoid a possible large class of two-byte errors that leave the check unchanged. (The Fletcher checksum uses 255, which is not prime and which also makes the Fletcher check insensitive to single byte changes 0 <-> 255.) The sum s1 is initialized to 1 instead of zero to make the length of the sequence part of s2, so that the length does not have to be checked separately. (Any sequence of zeroes has a Fletcher checksum of zero.) */ uint32 TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32) { // zlib adler32 return adler32(dwAdler32, data, Len); } uint32 TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride) { int i; uint32 ret = 1; uint32 width_in_bytes = width * size; for (i = 0; i < height; i++) { ret = Adler32(src, width_in_bytes, ret); src += rowStride; } return ret; } // Rotate left template static T __ROL__(T value, unsigned int count) { const unsigned int nbits = sizeof(T) * 8; count %= nbits; T high = value >> (nbits - count); value <<= count; value |= high; return value; } // Rice CRC32 for high resolution texture packs // NOTE: The following is used in Glide64 to calculate the CRC32 // for Rice high resolution texture packs. // BYTE* addr = (BYTE*)(gfx.RDRAM + // rdp.addr[rdp.tiles(tile).t_mem] + // (rdp.tiles(tile).ul_t * bpl) + // (((rdp.tiles(tile).ul_s<>1)); // RiceCRC32(addr, // rdp.tiles(tile).width, // rdp.tiles(tile).height, // (unsigned short)(rdp.tiles(tile).format << 8 | rdp.tiles(tile).size), // bpl); uint32 TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride) { const uint8_t *row; uint32_t crc32Ret; int cur_height; uint32_t pos; uint32_t word; uint32_t word_hash = 0; uint32_t tmp; const uint32_t bytes_per_width = ((width << size) + 1) >> 1; row = src; crc32Ret = 0; for (cur_height = height - 1; cur_height >= 0; cur_height--) { for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) { word = *(uint32_t *)&row[pos]; word_hash = pos ^ word; tmp = __ROL__(crc32Ret, 4); crc32Ret = word_hash + tmp; } crc32Ret += cur_height ^ word_hash; row += rowStride; } return crc32Ret; } bool TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride, uint32* crc32, uint32* cimax) { const uint8_t *row; uint32_t crc32Ret; uint32_t cimaxRet; int cur_height; uint32_t pos; uint32_t word; uint32_t word_hash = 0; uint32_t tmp; const uint32_t bytes_per_width = ((width << size) + 1) >> 1; row = src; crc32Ret = 0; cimaxRet = 0; for (cur_height = height - 1; cur_height >= 0; cur_height--) { for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) { word = *(uint32_t *)&row[pos]; if (cimaxRet != 15) { if ((word & 0xF) >= cimaxRet) cimaxRet = word & 0xF; if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet) cimaxRet = (uint8_t)word >> 4; if (((word >> 8) & 0xF) >= cimaxRet) cimaxRet = (word >> 8) & 0xF; if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet) cimaxRet = (uint16_t)word >> 12; if (((word >> 16) & 0xF) >= cimaxRet) cimaxRet = (word >> 16) & 0xF; if (((word >> 20) & 0xF) >= cimaxRet) cimaxRet = (word >> 20) & 0xF; if (((word >> 24) & 0xF) >= cimaxRet) cimaxRet = (word >> 24) & 0xF; if (word >> 28 >= cimaxRet) cimaxRet = word >> 28; } word_hash = pos ^ word; tmp = __ROL__(crc32Ret, 4); crc32Ret = word_hash + tmp; } crc32Ret += cur_height ^ word_hash; row += rowStride; } *crc32 = crc32Ret; *cimax = cimaxRet; return 1; } bool TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride, uint32* crc32, uint32* cimax) { const uint8_t *row; uint32_t crc32Ret; uint32_t cimaxRet; int cur_height; uint32_t pos; uint32_t word; uint32_t word_hash = 0; uint32_t tmp; const uint32_t bytes_per_width = ((width << size) + 1) >> 1; row = src; crc32Ret = 0; cimaxRet = 0; for (cur_height = height - 1; cur_height >= 0; cur_height--) { for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) { word = *(uint32_t *)&row[pos]; if (cimaxRet != 255) { if ((uint8_t)word >= cimaxRet) cimaxRet = (uint8_t)word; if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet) cimaxRet = (uint16_t)word >> 8; if (((word >> 16) & 0xFF) >= cimaxRet) cimaxRet = (word >> 16) & 0xFF; if (word >> 24 >= cimaxRet) cimaxRet = word >> 24; } word_hash = pos ^ word; tmp = __ROL__(crc32Ret, 4); crc32Ret = word_hash + tmp; } crc32Ret += cur_height ^ word_hash; row += rowStride; } *crc32 = crc32Ret; *cimax = cimaxRet; return 1; } int TxUtil::log2(int num) { #if defined(__GNUC__) return __builtin_ctz(num); #elif defined(_MSC_VER) && _MSC_VER >= 1400 uint32_t i; _BitScanForward((unsigned long *)&i, num); return i; #elif defined(__MSC__) __asm { mov eax, dword ptr[num]; bsr eax, eax; mov dword ptr[i], eax; } #else switch (num) { case 1: return 0; case 2: return 1; case 4: return 2; case 8: return 3; case 16: return 4; case 32: return 5; case 64: return 6; case 128: return 7; case 256: return 8; case 512: return 9; case 1024: return 10; case 2048: return 11; } #endif } int TxUtil::grLodLog2(int w, int h) { return (w >= h ? log2(w) : log2(h)); } int TxUtil::grAspectRatioLog2(int w, int h) { return (w >= h ? log2(w / h) : -log2(h / w)); } int TxUtil::getNumberofProcessors() { int numcore = 1, ret; #ifdef _WIN32 #ifndef _SC_NPROCESSORS_ONLN SYSTEM_INFO info; GetSystemInfo(&info); #define sysconf(a) info.dwNumberOfProcessors #define _SC_NPROCESSORS_ONLN #endif #endif #ifdef _SC_NPROCESSORS_ONLN ret = sysconf(_SC_NPROCESSORS_CONF); if (ret >= 1) { numcore = ret; } ret = sysconf(_SC_NPROCESSORS_ONLN); if (ret < 1) { numcore = ret; } #endif return numcore; } // Memory buffers for texture manipulations TxMemBuf::TxMemBuf() { int i; for (i = 0; i < 2; i++) { _tex[i] = nullptr; _size[i] = 0; } } TxMemBuf::~TxMemBuf() { shutdown(); } bool TxMemBuf::init(int maxwidth, int maxheight) { int i; for (i = 0; i < 2; i++) { if (!_tex[i]) { _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4); _size[i] = maxwidth * maxheight * 4; } if (!_tex[i]) { shutdown(); return 0; } } return 1; } void TxMemBuf::shutdown() { int i; for (i = 0; i < 2; i++) { if (_tex[i]) free(_tex[i]); _tex[i] = nullptr; _size[i] = 0; } } uint8* TxMemBuf::get(unsigned int num) { return ((num < 2) ? _tex[num] : nullptr); } uint32 TxMemBuf::size_of(unsigned int num) { return ((num < 2) ? _size[num] : 0); }