project64/Source/GlideHQ/TxUtil.cpp

585 lines
15 KiB
C++

/*
* Texture Filtering
* Version: 1.0
*
* Copyright (C) 2007 Hiroshi Morii All Rights Reserved.
* Email koolsmoky(at)users.sourceforge.net
* Web http://www.3dfxzone.it/koolsmoky
*
* this is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* this is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "TxUtil.h"
#include "TxDbg.h"
#include <zlib/zlib.h>
#include <malloc.h>
#include <stdlib.h>
#include <Common/stdtypes.h>
/*
* External libraries
******************************************************************************/
TxLoadLib::TxLoadLib()
{
#ifdef DXTN_DLL
if (!_dxtnlib)
_dxtnlib = LoadLibrary("dxtn");
if (_dxtnlib) {
if (!_tx_compress_dxtn)
_tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
if (!_tx_compress_fxt1)
_tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
}
#else
_tx_compress_dxtn = tx_compress_dxtn;
_tx_compress_fxt1 = fxt1_encode;
#endif
}
TxLoadLib::~TxLoadLib()
{
#ifdef DXTN_DLL
/* free dynamic library */
if (_dxtnlib)
FreeLibrary(_dxtnlib);
#endif
}
fxtCompressTexFuncExt
TxLoadLib::getfxtCompressTexFuncExt()
{
return _tx_compress_fxt1;
}
dxtCompressTexFuncExt
TxLoadLib::getdxtCompressTexFuncExt()
{
return _tx_compress_dxtn;
}
/*
* Utilities
******************************************************************************/
uint32
TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
{
int dataSize = sizeofTx(width, height, format);
/* for now we use adler32 if something else is better
* we can simply swtich later
*/
/* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
/* zlib crc32 */
return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
}
int
TxUtil::sizeofTx(int width, int height, uint16 format)
{
int dataSize = 0;
/* a lookup table for the shifts would be better */
switch (format) {
case GR_TEXFMT_ARGB_CMP_FXT1:
dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
break;
case GR_TEXFMT_ARGB_CMP_DXT1:
dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
break;
case GR_TEXFMT_ARGB_CMP_DXT3:
case GR_TEXFMT_ARGB_CMP_DXT5:
dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
break;
case GR_TEXFMT_ALPHA_INTENSITY_44:
case GR_TEXFMT_ALPHA_8:
case GR_TEXFMT_INTENSITY_8:
case GR_TEXFMT_P_8:
dataSize = width * height;
break;
case GR_TEXFMT_ARGB_4444:
case GR_TEXFMT_ARGB_1555:
case GR_TEXFMT_RGB_565:
case GR_TEXFMT_ALPHA_INTENSITY_88:
dataSize = (width * height) << 1;
break;
case GR_TEXFMT_ARGB_8888:
dataSize = (width * height) << 2;
break;
default:
/* unsupported format */
DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
;
}
return dataSize;
}
#if 0 /* unused */
uint32
TxUtil::chkAlpha(uint32* src, int width, int height)
{
/* NOTE: _src must be ARGB8888
* return values
* 0x00000000: 8bit alpha
* 0x00000001: 1bit alpha
* 0xff000001: no alpha
*/
int _size = width * height;
uint32 alpha = 0;
__asm {
mov esi, dword ptr [src];
mov ecx, dword ptr [_size];
mov ebx, 0xff000000;
tc1_loop:
mov eax, dword ptr [esi];
add esi, 4;
and eax, 0xff000000;
jz alpha1bit;
cmp eax, 0xff000000;
je alpha1bit;
jmp done;
alpha1bit:
and ebx, eax;
dec ecx;
jnz tc1_loop;
or ebx, 0x00000001;
mov dword ptr [alpha], ebx;
done:
}
return alpha;
}
#endif
uint32
TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
{
/* Rice CRC32 for now. We can switch this to Jabo MD5 or
* any other custom checksum.
* TODO: use *_HIRESTEXTURE option. */
if (!src) return 0;
return RiceCRC32(src, width, height, size, rowStride);
}
uint64
TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
{
/* Rice CRC32 for now. We can switch this to Jabo MD5 or
* any other custom checksum.
* TODO: use *_HIRESTEXTURE option. */
/* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
if (!src) return 0;
uint64 crc64Ret = 0;
if (palette) {
uint32 crc32 = 0, cimax = 0;
switch (size & 0xff) {
case 1:
if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
crc64Ret <<= 32;
crc64Ret |= (uint64)crc32;
}
break;
case 0:
if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
crc64Ret <<= 32;
crc64Ret |= (uint64)crc32;
}
}
}
if (!crc64Ret) {
crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
}
return crc64Ret;
}
/*
** Computes Adler32 checksum for a stream of data.
**
** From the specification found in RFC 1950: (ZLIB Compressed Data Format
** Specification version 3.3)
**
** ADLER32 (Adler-32 checksum) This contains a checksum value of the
** uncompressed data (excluding any dictionary data) computed according to
** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
**
** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
** as s2*65536 + s1 in most-significant-byte first (network) order.
**
** 8.2. The Adler-32 algorithm
**
** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
** provides an extremely low probability of undetected errors.
**
** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
** so the modulo operation time is negligible. If the bytes are a, b, c,
** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
** unlike the first sum, which is just a checksum. That 65521 is prime is
** important to avoid a possible large class of two-byte errors that leave
** the check unchanged. (The Fletcher checksum uses 255, which is not prime
** and which also makes the Fletcher check insensitive to single byte
** changes 0 <-> 255.)
**
** The sum s1 is initialized to 1 instead of zero to make the length of
** the sequence part of s2, so that the length does not have to be checked
** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
*/
uint32
TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
{
#if 1
/* zlib adler32 */
return adler32(dwAdler32, data, Len);
#else
register uint32 s1 = dwAdler32 & 0xFFFF;
register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
int k;
while (Len > 0) {
/* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
k = (Len < 5552 ? Len : 5552);
Len -= k;
while (k--) {
s1 += *data++;
s2 += s1;
}
/* 65521 is the largest prime smaller than 65536 */
s1 %= 65521;
s2 %= 65521;
}
return (s2 << 16) | s1;
#endif
}
uint32
TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
{
int i;
uint32 ret = 1;
uint32 width_in_bytes = width * size;
for (i = 0; i < height; i++) {
ret = Adler32(src, width_in_bytes, ret);
src += rowStride;
}
return ret;
}
// rotate left
template<class T> static T __ROL__(T value, unsigned int count)
{
const unsigned int nbits = sizeof(T) * 8;
count %= nbits;
T high = value >> (nbits - count);
value <<= count;
value |= high;
return value;
}
/* Rice CRC32 for hires texture packs */
/* NOTE: The following is used in Glide64 to calculate the CRC32
* for Rice hires texture packs.
*
* BYTE* addr = (BYTE*)(gfx.RDRAM +
* rdp.addr[rdp.tiles[tile].t_mem] +
* (rdp.tiles[tile].ul_t * bpl) +
* (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
* RiceCRC32(addr,
* rdp.tiles[tile].width,
* rdp.tiles[tile].height,
* (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
* bpl);
*/
uint32
TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
{
const uint8_t *row;
uint32_t crc32Ret;
int cur_height;
uint32_t pos;
uint32_t word;
uint32_t word_hash = 0;
uint32_t tmp;
const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
row = src;
crc32Ret = 0;
for (cur_height = height - 1; cur_height >= 0; cur_height--) {
for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
word = *(uint32_t *)&row[pos];
word_hash = pos ^ word;
tmp = __ROL__(crc32Ret, 4);
crc32Ret = word_hash + tmp;
}
crc32Ret += cur_height ^ word_hash;
row += rowStride;
}
return crc32Ret;
}
boolean
TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
uint32* crc32, uint32* cimax)
{
const uint8_t *row;
uint32_t crc32Ret;
uint32_t cimaxRet;
int cur_height;
uint32_t pos;
uint32_t word;
uint32_t word_hash = 0;
uint32_t tmp;
const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
row = src;
crc32Ret = 0;
cimaxRet = 0;
for (cur_height = height - 1; cur_height >= 0; cur_height--) {
for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
word = *(uint32_t *)&row[pos];
if (cimaxRet != 15) {
if ((word & 0xF) >= cimaxRet)
cimaxRet = word & 0xF;
if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
cimaxRet = (uint8_t)word >> 4;
if (((word >> 8) & 0xF) >= cimaxRet)
cimaxRet = (word >> 8) & 0xF;
if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
cimaxRet = (uint16_t)word >> 12;
if (((word >> 16) & 0xF) >= cimaxRet)
cimaxRet = (word >> 16) & 0xF;
if (((word >> 20) & 0xF) >= cimaxRet)
cimaxRet = (word >> 20) & 0xF;
if (((word >> 24) & 0xF) >= cimaxRet)
cimaxRet = (word >> 24) & 0xF;
if (word >> 28 >= cimaxRet )
cimaxRet = word >> 28;
}
word_hash = pos ^ word;
tmp = __ROL__(crc32Ret, 4);
crc32Ret = word_hash + tmp;
}
crc32Ret += cur_height ^ word_hash;
row += rowStride;
}
*crc32 = crc32Ret;
*cimax = cimaxRet;
return 1;
}
boolean
TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
uint32* crc32, uint32* cimax)
{
const uint8_t *row;
uint32_t crc32Ret;
uint32_t cimaxRet;
int cur_height;
uint32_t pos;
uint32_t word;
uint32_t word_hash = 0;
uint32_t tmp;
const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
row = src;
crc32Ret = 0;
cimaxRet = 0;
for (cur_height = height - 1; cur_height >= 0; cur_height--) {
for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
word = *(uint32_t *)&row[pos];
if (cimaxRet != 255) {
if ((uint8_t)word >= cimaxRet)
cimaxRet = (uint8_t)word;
if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
cimaxRet = (uint16_t)word >> 8;
if (((word >> 16) & 0xFF) >= cimaxRet)
cimaxRet = (word >> 16) & 0xFF;
if (word >> 24 >= cimaxRet)
cimaxRet = word >> 24;
}
word_hash = pos ^ word;
tmp = __ROL__(crc32Ret, 4);
crc32Ret = word_hash + tmp;
}
crc32Ret += cur_height ^ word_hash;
row += rowStride;
}
*crc32 = crc32Ret;
*cimax = cimaxRet;
return 1;
}
int
TxUtil::log2(int num)
{
#if defined(__GNUC__)
return __builtin_ctz(num);
#elif defined(_MSC_VER) && _MSC_VER >= 1400
uint32_t i;
_BitScanForward((DWORD *)&i, num);
return i;
#elif defined(__MSC__)
__asm {
mov eax, dword ptr [num];
bsr eax, eax;
mov dword ptr [i], eax;
}
#else
switch (num) {
case 1: return 0;
case 2: return 1;
case 4: return 2;
case 8: return 3;
case 16: return 4;
case 32: return 5;
case 64: return 6;
case 128: return 7;
case 256: return 8;
case 512: return 9;
case 1024: return 10;
case 2048: return 11;
}
#endif
}
int
TxUtil::grLodLog2(int w, int h)
{
return (w >= h ? log2(w) : log2(h));
}
int
TxUtil::grAspectRatioLog2(int w, int h)
{
return (w >= h ? log2(w/h) : -log2(h/w));
}
int
TxUtil::getNumberofProcessors()
{
int numcore = 1, ret;
#ifdef _WIN32
#ifndef _SC_NPROCESSORS_ONLN
SYSTEM_INFO info;
GetSystemInfo(&info);
#define sysconf(a) info.dwNumberOfProcessors
#define _SC_NPROCESSORS_ONLN
#endif
#endif
#ifdef _SC_NPROCESSORS_ONLN
ret = sysconf(_SC_NPROCESSORS_CONF);
if (ret >= 1) {
numcore = ret;
}
ret = sysconf(_SC_NPROCESSORS_ONLN);
if (ret < 1) {
numcore = ret;
}
#endif
return numcore;
}
/*
* Memory buffers for texture manipulations
******************************************************************************/
TxMemBuf::TxMemBuf()
{
int i;
for (i = 0; i < 2; i++) {
_tex[i] = NULL;
_size[i] = 0;
}
}
TxMemBuf::~TxMemBuf()
{
shutdown();
}
boolean
TxMemBuf::init(int maxwidth, int maxheight)
{
int i;
for (i = 0; i < 2; i++) {
if (!_tex[i]) {
_tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
_size[i] = maxwidth * maxheight * 4;
}
if (!_tex[i]) {
shutdown();
return 0;
}
}
return 1;
}
void
TxMemBuf::shutdown()
{
int i;
for (i = 0; i < 2; i++) {
if (_tex[i]) free(_tex[i]);
_tex[i] = NULL;
_size[i] = 0;
}
}
uint8*
TxMemBuf::get(unsigned int num)
{
return ((num < 2) ? _tex[num] : NULL);
}
uint32
TxMemBuf::size_of(unsigned int num)
{
return ((num < 2) ? _size[num] : 0);
}