diff --git a/Source/GlideHQ/Ext_TxFilter.cpp b/Source/GlideHQ/Ext_TxFilter.cpp new file mode 100644 index 000000000..f5dcc7543 --- /dev/null +++ b/Source/GlideHQ/Ext_TxFilter.cpp @@ -0,0 +1,27 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include "Ext_TxFilter.h" + diff --git a/Source/GlideHQ/Ext_TxFilter.h b/Source/GlideHQ/Ext_TxFilter.h new file mode 100644 index 000000000..54cb8b68b --- /dev/null +++ b/Source/GlideHQ/Ext_TxFilter.h @@ -0,0 +1,212 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __EXT_TXFILTER_H__ +#define __EXT_TXFILTER_H__ + +#ifdef WIN32 +#include +#define TXHMODULE HMODULE +#define DLOPEN(a) LoadLibraryW(a) +#define DLCLOSE(a) FreeLibrary(a) +#define DLSYM(a, b) GetProcAddress(a, b) +#define GETCWD(a, b) GetCurrentDirectoryW(a, b) +#define CHDIR(a) SetCurrentDirectoryW(a) +#else +#include +#include +#define MAX_PATH 4095 +#define TXHMODULE void* +#define DLOPEN(a) dlopen(a, RTLD_LAZY|RTLD_GLOBAL) +#define DLCLOSE(a) dlclose(a) +#define DLSYM(a, b) dlsym(a, b) +#define GETCWD(a, b) getcwd(b, a) +#define CHDIR(a) chdir(a) +#endif + +#ifdef WIN32 +typedef __int64 int64; +typedef unsigned __int64 uint64; +typedef unsigned char boolean; +#else +typedef long long int64; +typedef unsigned long long uint64; +typedef unsigned char boolean; +#endif + +#define NO_OPTIONS 0x00000000 + +#define FILTER_MASK 0x000000ff +#define NO_FILTER 0x00000000 +#define SMOOTH_FILTER_MASK 0x0000000f +#define NO_SMOOTH_FILTER 0x00000000 +#define SMOOTH_FILTER_1 0x00000001 +#define SMOOTH_FILTER_2 0x00000002 +#define SMOOTH_FILTER_3 0x00000003 +#define SMOOTH_FILTER_4 0x00000004 +#define SHARP_FILTER_MASK 0x000000f0 +#define NO_SHARP_FILTER 0x00000000 +#define SHARP_FILTER_1 0x00000010 +#define SHARP_FILTER_2 0x00000020 + +#define ENHANCEMENT_MASK 0x00000f00 +#define NO_ENHANCEMENT 0x00000000 +#define X2_ENHANCEMENT 0x00000100 +#define X2SAI_ENHANCEMENT 0x00000200 +#define HQ2X_ENHANCEMENT 0x00000300 +#define LQ2X_ENHANCEMENT 0x00000400 +#define HQ4X_ENHANCEMENT 0x00000500 +#define HQ2XS_ENHANCEMENT 0x00000600 +#define LQ2XS_ENHANCEMENT 0x00000700 + +#define COMPRESSION_MASK 0x0000f000 +#define NO_COMPRESSION 0x00000000 +#define FXT1_COMPRESSION 0x00001000 +#define NCC_COMPRESSION 0x00002000 +#define S3TC_COMPRESSION 0x00003000 + +#define HIRESTEXTURES_MASK 0x000f0000 +#define NO_HIRESTEXTURES 0x00000000 +#define GHQ_HIRESTEXTURES 0x00010000 +#define RICE_HIRESTEXTURES 0x00020000 +#define JABO_HIRESTEXTURES 0x00030000 + +#define COMPRESS_TEX 0x00100000 +#define COMPRESS_HIRESTEX 0x00200000 +#define GZ_TEXCACHE 0x00400000 +#define GZ_HIRESTEXCACHE 0x00800000 +#define DUMP_TEXCACHE 0x01000000 +#define DUMP_HIRESTEXCACHE 0x02000000 +#define TILE_HIRESTEX 0x04000000 +#define UNDEFINED_0 0x08000000 +#define FORCE16BPP_HIRESTEX 0x10000000 +#define FORCE16BPP_TEX 0x20000000 +#define LET_TEXARTISTS_FLY 0x40000000 /* a little freedom for texture artists */ +#define DUMP_TEX 0x80000000 + +#ifndef __GLIDE_H__ /* GLIDE3 */ +/* from 3Dfx Interactive Inc. glide.h */ +#define GR_TEXFMT_ALPHA_8 0x2 +#define GR_TEXFMT_INTENSITY_8 0x3 + +#define GR_TEXFMT_ALPHA_INTENSITY_44 0x4 +#define GR_TEXFMT_P_8 0x5 + +#define GR_TEXFMT_RGB_565 0xa +#define GR_TEXFMT_ARGB_1555 0xb +#define GR_TEXFMT_ARGB_4444 0xc +#define GR_TEXFMT_ALPHA_INTENSITY_88 0xd + +/* from 3Dfx Interactive Inc. g3ext.h */ +#define GR_TEXFMT_ARGB_CMP_FXT1 0x11 + +#define GR_TEXFMT_ARGB_8888 0x12 + +#define GR_TEXFMT_ARGB_CMP_DXT1 0x16 +#define GR_TEXFMT_ARGB_CMP_DXT3 0x18 +#define GR_TEXFMT_ARGB_CMP_DXT5 0x1A +#endif /* GLIDE3 */ + +struct GHQTexInfo { + unsigned char *data; + int width; + int height; + unsigned short format; + + int smallLodLog2; + int largeLodLog2; + int aspectRatioLog2; + + int tiles; + int untiled_width; + int untiled_height; + + unsigned char is_hires_tex; +}; + +/* Callback to display hires texture info. + * Gonetz + * + * void DispInfo(const char *format, ...) + * { + * va_list args; + * char buf[INFO_BUF]; + * + * va_start(args, format); + * vsprintf(buf, format, args); + * va_end(args); + * + * printf(buf); + * } + */ +#define INFO_BUF 4095 +typedef void (*dispInfoFuncExt)(const wchar_t *format, ...); + +#ifndef TXFILTER_DLL +boolean ext_ghq_init(int maxwidth, /* maximum texture width supported by hardware */ + int maxheight,/* maximum texture height supported by hardware */ + int maxbpp, /* maximum texture bpp supported by hardware */ + int options, /* options */ + int cachesize,/* cache textures to system memory */ + wchar_t *path, /* plugin directory. must be smaller than MAX_PATH */ + wchar_t *ident, /* name of ROM. must be no longer than 64 in character. */ + dispInfoFuncExt callback /* callback function to display info */ + ); + +void ext_ghq_shutdown(void); + +boolean ext_ghq_txfilter(unsigned char *src, /* input texture */ + int srcwidth, /* width of input texture */ + int srcheight, /* height of input texture */ + unsigned short srcformat, /* format of input texture */ + uint64 g64crc, /* glide64 crc */ + GHQTexInfo *info /* output */ + ); + +boolean ext_ghq_hirestex(uint64 g64crc, /* glide64 crc */ + uint64 r_crc64, /* checksum hi:palette low:texture */ + unsigned short *palette, /* palette for CI textures */ + GHQTexInfo *info /* output */ + ); + +uint64 ext_ghq_checksum(unsigned char *src, /* input texture */ + int width, /* width of texture */ + int height, /* height of texture */ + int size, /* type of texture pixel */ + int rowStride, /* row stride in bytes */ + unsigned char *palette /* palette */ + ); + +boolean ext_ghq_dmptx(unsigned char *src, /* input texture (must be in 3Dfx Glide format) */ + int width, /* width of texture */ + int height, /* height of texture */ + int rowStridePixel, /* row stride of input texture in pixels */ + unsigned short gfmt, /* glide format of input texture */ + unsigned short n64fmt,/* N64 format hi:format low:size */ + uint64 r_crc64 /* checksum hi:palette low:texture */ + ); + +boolean ext_ghq_reloadhirestex(); +#endif /* TXFILTER_DLL */ + +#endif /* __EXT_TXFILTER_H__ */ diff --git a/Source/GlideHQ/GlideHQ.vcproj b/Source/GlideHQ/GlideHQ.vcproj new file mode 100644 index 000000000..4e5d40162 --- /dev/null +++ b/Source/GlideHQ/GlideHQ.vcproj @@ -0,0 +1,295 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Source/GlideHQ/README.txt b/Source/GlideHQ/README.txt new file mode 100644 index 000000000..c903e5736 --- /dev/null +++ b/Source/GlideHQ/README.txt @@ -0,0 +1,94 @@ +/* + * GlideHQ (Texture enhancer library for Glide64) + * Version: 1.5 + * + * Copyright (C) 2007 Hiroshi Morii aka KoolSmoky All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +About: +This is a realtime texture enhancer library with hi-resolution texture +pack support for Glide64 (http://glide64.emuxhaven.net). Traditional and +non-traditional techniques have been used to achieve speed and high image +quality even on a 9 year old 3Dfx Voodoo2. + +Although the 3Dfx Glide3x texture format naming conventions are used, the +library can be expanded for generic use. + +Supported: +OS: 32bit Linux and MS Windows +Enhancers: Hq4x, Hq2x, Hq2xS, Lq2x, Lq2xS, Super2xSai, x2 +Filters: Smooth (1,2,3,4), Sharp (1,2) +Compressors: FXT1, S3TC +Input formats: GR_TEXFMT_ALPHA_8, + GR_TEXFMT_RGB_565, + GR_TEXFMT_ARGB_1555, + GR_TEXFMT_ARGB_4444, + GR_TEXFMT_ARGB_8888, + GR_TEXFMT_ALPHA_INTENSITY_44, + GR_TEXFMT_ALPHA_INTENSITY_88 +Output formats: Same as input unless compression or hires packs are used. +Hires texture packs: Rice format (Jabo and GlideHQ format coming later) + +Acknowledgments: +I hope you enjoy GlideHQ (texture enhancer library for Glide64). Greatest +thanks to Gonetz for making this happen in his busy time. We've rushed +everything to share the eye-candy with all of you N64 emulation fans. I +would also like to thank a great friend of mine, Daniel Borca for providing +the texture compression code, Maxim Stepin (hq2x 4x), and Derek Liauw Kie Fa +(2xSaI) for the filtering engines, Rice for his N64 graphics plugin source +code, and Mudlord for the hq2xS lq2xS code. GlideHQ also uses the boost C++ +libraries, zlib general purpose compression library, and the Portable Network +Graphics library. Thanks to all the developers for making them available. And +special thanks to the Glide64 beta testing crew. Without their feedbacks +this library would not have seen daylight. Thank you all. + +The source code for GlideHQ is released in hopes that it will be improved. +I know the coding is not on par after so much late night caffeine boosts. +If you have suggestions or modifications, please feel free to post them on +the Glide64 forum at emuxhaven. + +Porting the library to other platforms should not be so hard. The coding is +done with cross platform compatibility in mind and will build with GCC and +GNU make. Currently supported are 32bit Linux and MS Windows. + +If you are looking for driver updates for your 3Dfx Interactive Inc. gfx +card, grab them from the forums at http://www.3dfxzone.it/enboard/ +Unbelievable as it seems, drivers are still being updated after 6 years +from 3Dfx's demise. + +I know N64 rules, anyone up for PSX? :)) + +-KoolSmoky + +References: +[1] R.W. Floyd & L. Steinberg, An adaptive algorithm for spatial grey scale, + Proceedings of the Society of Information Display 17, pp75-77, 1976 +[2] Ken Turkowski, Filters for Common Resampling Tasks, Apple Computer 1990 + http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf +[3] Don P. Mitchell and Arun N. Netravali, Reconstruction Filters in Computer + Graphics, SIGGRAPH '88, Proceedings of the 15th annual conference on + Computer graphics and interactive techniques, pp221-228, 1988 +[4] J. F. Kaiser and W. A. Reed, Data smoothing using low-pass digital + filters, Rev. Sci. instrum. 48 (11), pp1447-1457, 1977 +[5] Maxim Stepin, hq4x Magnification Filter, http://www.hiend3d.com/hq4x.html +[6] Derek Liauw Kie Fa, 2xSaI, http://elektron.its.tudelft.nl/~dalikifa +[7] Dirk Stevens, Eagle engine http://www.retrofx.com/rfxtech.html +[8] 3DFX_texture_compression_FXT1 and EXT_texture_compression_s3tc extension + specs from the OpenGL Extension Registry. http://oss.sgi.com/projects/ + ogl-sample/registry/ diff --git a/Source/GlideHQ/TextureFilters.cpp b/Source/GlideHQ/TextureFilters.cpp new file mode 100644 index 000000000..e0e2d9b43 --- /dev/null +++ b/Source/GlideHQ/TextureFilters.cpp @@ -0,0 +1,715 @@ +/* +Copyright (C) 2003 Rice1964 + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +*/ + +/* Copyright (C) 2007 Hiroshi Morii + * Modified for the Texture Filtering library + */ + +#include +#include "TextureFilters.h" + +/************************************************************************/ +/* 2X filters */ +/************************************************************************/ + +#define DWORD_MAKE(r, g, b, a) ((uint32) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))) +#define WORD_MAKE(r, g, b, a) ((uint16) (((a) << 12) | ((r) << 8) | ((g) << 4) | (b))) + +// Basic 2x R8G8B8A8 filter with interpolation + +void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint32 *pDst1, *pDst2; + uint32 *pSrc, *pSrc2; + uint32 nWidth = width; + uint32 nHeight = height; + + uint32 b1; + uint32 g1; + uint32 r1; + uint32 a1; + uint32 b2; + uint32 g2; + uint32 r2; + uint32 a2; + uint32 b3; + uint32 g3; + uint32 r3; + uint32 a3; + uint32 b4; + uint32 g4; + uint32 r4; + uint32 a4; + + uint32 xSrc; + uint32 ySrc; + + for (ySrc = 0; ySrc < nHeight; ySrc++) + { + pSrc = (uint32*)(((uint8*)srcPtr)+ySrc*srcPitch); + pSrc2 = (uint32*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch); + pDst1 = (uint32*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch); + pDst2 = (uint32*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch); + + for (xSrc = 0; xSrc < nWidth; xSrc++) + { + b1 = (pSrc[xSrc]>>0)&0xFF; + g1 = (pSrc[xSrc]>>8)&0xFF; + r1 = (pSrc[xSrc]>>16)&0xFF; + a1 = (pSrc[xSrc]>>24)&0xFF; + + if( xSrc>0)&0xFF; + g2 = (pSrc[xSrc+1]>>8)&0xFF; + r2 = (pSrc[xSrc+1]>>16)&0xFF; + a2 = (pSrc[xSrc+1]>>24)&0xFF; + } + + if( ySrc>0)&0xFF; + g3 = (pSrc2[xSrc]>>8)&0xFF; + r3 = (pSrc2[xSrc]>>16)&0xFF; + a3 = (pSrc2[xSrc]>>24)&0xFF; + if( xSrc>0)&0xFF; + g4 = (pSrc2[xSrc+1]>>8)&0xFF; + r4 = (pSrc2[xSrc+1]>>16)&0xFF; + a4 = (pSrc2[xSrc+1]>>24)&0xFF; + } + } + + + // Pixel 1 + pDst1[xSrc*2] = pSrc[xSrc]; + + // Pixel 2 + if( xSrc> 0)&0xF; + g1 = (pSrc[xSrc]>> 4)&0xF; + r1 = (pSrc[xSrc]>> 8)&0xF; + a1 = (pSrc[xSrc]>>12)&0xF; + + if( xSrc> 0)&0xF; + g2 = (pSrc[xSrc+1]>> 4)&0xF; + r2 = (pSrc[xSrc+1]>> 8)&0xF; + a2 = (pSrc[xSrc+1]>>12)&0xF; + } + + if( ySrc> 0)&0xF; + g3 = (pSrc2[xSrc]>> 4)&0xF; + r3 = (pSrc2[xSrc]>> 8)&0xF; + a3 = (pSrc2[xSrc]>>12)&0xF; + if( xSrc> 0)&0xF; + g4 = (pSrc2[xSrc+1]>> 4)&0xF; + r4 = (pSrc2[xSrc+1]>> 8)&0xF; + a4 = (pSrc2[xSrc+1]>>12)&0xF; + } + } + + // Pixel 1 + pDst1[xSrc*2] = pSrc[xSrc]; + + // Pixel 2 + if( xSrc + */ +void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) +{ + // NOTE: for now we get away with copying the boundaries + // filter the boundaries if we face problems + + uint32 mul1, mul2, mul3, shift4; + + uint32 x,y,z; + uint32 *_src1, *_src2, *_src3, *_dest; + uint32 val[4]; + uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9; + + switch( filter ) + { + case SHARP_FILTER_2: + mul1=1; + mul2=8; + mul3=12; + shift4=2; + break; + case SHARP_FILTER_1: + default: + mul1=1; + mul2=8; + mul3=16; + shift4=3; + break; + } + + // setup rows + _src1 = src; + _src2 = _src1 + srcwidth; + _src3 = _src2 + srcwidth; + _dest = dest; + + // copy the first row + memcpy(_dest, _src1, (srcwidth << 2)); + _dest += srcwidth; + // filter 2nd row to 1 row before the last + for (y = 1; y < srcheight-1; y++) { + // copy the first pixel + _dest[0] = *_src2; + // filter 2nd pixel to 1 pixel before last + for (x = 1; x < srcwidth-1; x++) { + for (z=0; z<4; z++) { + t1 = *((uint8*)(_src1+x-1)+z); + t2 = *((uint8*)(_src1+x )+z); + t3 = *((uint8*)(_src1+x+1)+z); + t4 = *((uint8*)(_src2+x-1)+z); + t5 = *((uint8*)(_src2+x )+z); + t6 = *((uint8*)(_src2+x+1)+z); + t7 = *((uint8*)(_src3+x-1)+z); + t8 = *((uint8*)(_src3+x )+z); + t9 = *((uint8*)(_src3+x+1)+z); + + if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) { + val[z]= ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4; + if (val[z] > 0xFF) val[z] = 0xFF; + } else { + val[z] = t5; + } + } + _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24); + } + // copy the ending pixel + _dest[srcwidth-1] = *(_src3 - 1); + // next row + _src1 += srcwidth; + _src2 += srcwidth; + _src3 += srcwidth; + _dest += srcwidth; + } + // copy the last row + memcpy(_dest, _src2, (srcwidth << 2)); +} + +#if !_16BPP_HACK +void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter) +{ + // NOTE: for now we get away with copying the boundaries + // filter the boundaries if we face problems + + uint16 mul1, mul2, mul3, shift4; + + uint32 x,y,z; + uint16 *_src1, *_src2, *_src3, *_dest; + uint16 val[4]; + uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9; + + switch( filter ) { + case SHARP_FILTER_2: + mul1=1; + mul2=8; + mul3=12; + shift4=2; + break; + case SHARP_FILTER_1: + default: + mul1=1; + mul2=8; + mul3=16; + shift4=3; + break; + } + + // setup rows + _src1 = src; + _src2 = _src1 + srcwidth; + _src3 = _src2 + srcwidth; + _dest = dest; + + // copy the first row + memcpy(_dest, _src1, (srcwidth << 1)); + _dest += srcwidth; + // filter 2nd row to 1 row before the last + for( y = 1; y < srcheight - 1; y++) { + // copy the first pixel + _dest[0] = *_src2; + // filter 2nd pixel to 1 pixel before last + for( x = 1; x < srcwidth - 1; x++) { + for( z = 0; z < 4; z++ ) { + /* Hiroshi Morii + * Read the entire 16bit pixel and then extract the A,R,G,B components. + */ + uint32 shift = z << 2; + t1 = ((*((uint16*)(_src1+x-1))) >> shift) & 0xF; + t2 = ((*((uint16*)(_src1+x ))) >> shift) & 0xF; + t3 = ((*((uint16*)(_src1+x+1))) >> shift) & 0xF; + t4 = ((*((uint16*)(_src2+x-1))) >> shift) & 0xF; + t5 = ((*((uint16*)(_src2+x ))) >> shift) & 0xF; + t6 = ((*((uint16*)(_src2+x+1))) >> shift) & 0xF; + t7 = ((*((uint16*)(_src3+x-1))) >> shift) & 0xF; + t8 = ((*((uint16*)(_src3+x ))) >> shift) & 0xF; + t9 = ((*((uint16*)(_src3+x+1))) >> shift) & 0xF; + + if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) { + val[z] = ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4; + if (val[z] > 0xF) val[z] = 0xF; + } else { + val[z] = t5; + } + } + _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12); + } + // copy the ending pixel + _dest[srcwidth-1] = *(_src3 - 1); + // next row + _src1 += srcwidth; + _src2 += srcwidth; + _src3 += srcwidth; + _dest += srcwidth; + } + // copy the last row + memcpy(_dest, _src2, (srcwidth << 1)); +} +#endif /* !_16BPP_HACK */ + +/* + * Smooth filters + * Hiroshi Morii + */ +void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) +{ + // NOTE: for now we get away with copying the boundaries + // filter the boundaries if we face problems + + uint32 mul1, mul2, mul3, shift4; + + uint32 x,y,z; + uint32 *_src1, *_src2, *_src3, *_dest; + uint32 val[4]; + uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9; + + switch( filter ) { + case SMOOTH_FILTER_4: + mul1=1; + mul2=2; + mul3=4; + shift4=4; + break; + case SMOOTH_FILTER_3: + mul1=1; + mul2=1; + mul3=8; + shift4=4; + break; + case SMOOTH_FILTER_2: + mul1=1; + mul2=1; + mul3=2; + shift4=2; + break; + case SMOOTH_FILTER_1: + default: + mul1=1; + mul2=1; + mul3=6; + shift4=3; + break; + } + + switch (filter) { + case SMOOTH_FILTER_3: + case SMOOTH_FILTER_4: + // setup rows + _src1 = src; + _src2 = _src1 + srcwidth; + _src3 = _src2 + srcwidth; + _dest = dest; + // copy the first row + memcpy(_dest, _src1, (srcwidth << 2)); + _dest += srcwidth; + // filter 2nd row to 1 row before the last + for (y = 1; y < srcheight - 1; y++){ + // copy the first pixel + _dest[0] = _src2[0]; + // filter 2nd pixel to 1 pixel before last + for (x = 1; x < srcwidth - 1; x++) { + for (z = 0; z < 4; z++ ) { + t1 = *((uint8*)(_src1+x-1)+z); + t2 = *((uint8*)(_src1+x )+z); + t3 = *((uint8*)(_src1+x+1)+z); + t4 = *((uint8*)(_src2+x-1)+z); + t5 = *((uint8*)(_src2+x )+z); + t6 = *((uint8*)(_src2+x+1)+z); + t7 = *((uint8*)(_src3+x-1)+z); + t8 = *((uint8*)(_src3+x )+z); + t9 = *((uint8*)(_src3+x+1)+z); + /* the component value must not overflow 0xFF */ + val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4; + if (val[z] > 0xFF) val[z] = 0xFF; + } + _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24); + } + // copy the ending pixel + _dest[srcwidth-1] = *(_src3 - 1); + // next row + _src1 += srcwidth; + _src2 += srcwidth; + _src3 += srcwidth; + _dest += srcwidth; + } + // copy the last row + memcpy(_dest, _src2, (srcwidth << 2)); + break; + case SMOOTH_FILTER_1: + case SMOOTH_FILTER_2: + default: + // setup rows + _src1 = src; + _src2 = _src1 + srcwidth; + _src3 = _src2 + srcwidth; + _dest = dest; + // copy the first row + memcpy(_dest, _src1, (srcwidth << 2)); + _dest += srcwidth; + // filter 2nd row to 1 row before the last + for (y = 1; y < srcheight - 1; y++) { + // filter 1st pixel to the last + if (y & 1) { + for( x = 0; x < srcwidth; x++) { + for( z = 0; z < 4; z++ ) { + t2 = *((uint8*)(_src1+x )+z); + t5 = *((uint8*)(_src2+x )+z); + t8 = *((uint8*)(_src3+x )+z); + /* the component value must not overflow 0xFF */ + val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4; + if (val[z] > 0xFF) val[z] = 0xFF; + } + _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24); + } + } else { + memcpy(_dest, _src2, (srcwidth << 2)); + } + // next row + _src1 += srcwidth; + _src2 += srcwidth; + _src3 += srcwidth; + _dest += srcwidth; + } + // copy the last row + memcpy(_dest, _src2, (srcwidth << 2)); + break; + } +} + +#if !_16BPP_HACK +void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter) +{ + // NOTE: for now we get away with copying the boundaries + // filter the boundaries if we face problems + + uint16 mul1, mul2, mul3, shift4; + + uint32 x,y,z; + uint16 *_src1, *_src2, *_src3, *_dest; + uint16 val[4]; + uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9; + + switch( filter ) { + case SMOOTH_FILTER_4: + mul1=1; + mul2=2; + mul3=4; + shift4=4; + break; + case SMOOTH_FILTER_3: + mul1=1; + mul2=1; + mul3=8; + shift4=4; + break; + case SMOOTH_FILTER_2: + mul1=1; + mul2=1; + mul3=2; + shift4=2; + break; + case SMOOTH_FILTER_1: + default: + mul1=1; + mul2=1; + mul3=6; + shift4=3; + break; + } + + switch (filter) { + case SMOOTH_FILTER_3: + case SMOOTH_FILTER_4: + // setup rows + _src1 = src; + _src2 = _src1 + srcwidth; + _src3 = _src2 + srcwidth; + _dest = dest; + // copy the first row + memcpy(_dest, _src1, (srcwidth << 1)); + _dest += srcwidth; + // filter 2nd row to 1 row before the last + for (y = 1; y < srcheight - 1; y++) { + // copy the first pixel + _dest[0] = *_src2; + // filter 2nd pixel to 1 pixel before last + for (x = 1; x < srcwidth - 1; x++) { + for (z = 0; z < 4; z++ ) { + /* Read the entire 16bit pixel and then extract the A,R,G,B components. */ + uint32 shift = z << 2; + t1 = ((*(uint16*)(_src1+x-1)) >> shift) & 0xF; + t2 = ((*(uint16*)(_src1+x )) >> shift) & 0xF; + t3 = ((*(uint16*)(_src1+x+1)) >> shift) & 0xF; + t4 = ((*(uint16*)(_src2+x-1)) >> shift) & 0xF; + t5 = ((*(uint16*)(_src2+x )) >> shift) & 0xF; + t6 = ((*(uint16*)(_src2+x+1)) >> shift) & 0xF; + t7 = ((*(uint16*)(_src3+x-1)) >> shift) & 0xF; + t8 = ((*(uint16*)(_src3+x )) >> shift) & 0xF; + t9 = ((*(uint16*)(_src3+x+1)) >> shift) & 0xF; + /* the component value must not overflow 0xF */ + val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4; + if (val[z] > 0xF) val[z] = 0xF; + } + _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12); + } + // copy the ending pixel + _dest[srcwidth-1] = *(_src3 - 1); + // next row + _src1 += srcwidth; + _src2 += srcwidth; + _src3 += srcwidth; + _dest += srcwidth; + } + // copy the last row + memcpy(_dest, _src2, (srcwidth << 1)); + break; + case SMOOTH_FILTER_1: + case SMOOTH_FILTER_2: + default: + // setup rows + _src1 = src; + _src2 = _src1 + srcwidth; + _src3 = _src2 + srcwidth; + _dest = dest; + // copy the first row + memcpy(_dest, _src1, (srcwidth << 1)); + _dest += srcwidth; + // filter 2nd row to 1 row before the last + for( y = 1; y < srcheight - 1; y++) { + if (y & 1) { + for( x = 0; x < srcwidth; x++) { + for( z = 0; z < 4; z++ ) { + /* Read the entire 16bit pixel and then extract the A,R,G,B components. */ + uint32 shift = z << 2; + t2 = ((*(uint16*)(_src1+x)) >> shift) & 0xF; + t5 = ((*(uint16*)(_src2+x)) >> shift) & 0xF; + t8 = ((*(uint16*)(_src3+x)) >> shift) & 0xF; + /* the component value must not overflow 0xF */ + val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4; + if (val[z] > 0xF) val[z] = 0xF; + } + _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12); + } + } else { + memcpy(_dest, _src2, (srcwidth << 1)); + } + // next row + _src1 += srcwidth; + _src2 += srcwidth; + _src3 += srcwidth; + _dest += srcwidth; + } + // copy the last row + memcpy(_dest, _src2, (srcwidth << 1)); + break; + } +} +#endif /* !_16BPP_HACK */ + +void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) { + switch (filter & ENHANCEMENT_MASK) { + case HQ4X_ENHANCEMENT: + hq4x_8888((uint8*)src, (uint8*)dest, srcwidth, srcheight, srcwidth, (srcwidth << 4)); + return; + case HQ2X_ENHANCEMENT: + hq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight); + return; + case HQ2XS_ENHANCEMENT: + hq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight); + return; + case LQ2X_ENHANCEMENT: + lq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight); + return; + case LQ2XS_ENHANCEMENT: + lq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight); + return; + case X2SAI_ENHANCEMENT: + Super2xSaI_8888((uint32*)src, (uint32*)dest, srcwidth, srcheight, srcwidth); + return; + case X2_ENHANCEMENT: + Texture2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight); + return; + } + + switch (filter & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) { + case SMOOTH_FILTER_1: + case SMOOTH_FILTER_2: + case SMOOTH_FILTER_3: + case SMOOTH_FILTER_4: + SmoothFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SMOOTH_FILTER_MASK)); + return; + case SHARP_FILTER_1: + case SHARP_FILTER_2: + SharpFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SHARP_FILTER_MASK)); + return; + } +} diff --git a/Source/GlideHQ/TextureFilters.h b/Source/GlideHQ/TextureFilters.h new file mode 100644 index 000000000..7830eac5f --- /dev/null +++ b/Source/GlideHQ/TextureFilters.h @@ -0,0 +1,81 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef __TEXTUREFILTERS_H__ +#define __TEXTUREFILTERS_H__ + +/* 16bpp filters are somewhat buggy and output image is not clean. + * Since there's not much time, we'll just convert them to ARGB8888 + * and use 32bpp filters until fixed. + * (1:enable hack, 0:disable hack) */ +#define _16BPP_HACK 1 + +#include "TxInternal.h" + +/* enhancers */ +void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL); + +void hq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); +void hq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); + +void lq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); +void lq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); + +void Super2xSaI_8888(uint32 *srcPtr, uint32 *destPtr, uint32 width, uint32 height, uint32 pitch); + +void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); + +/* filters */ +void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter); + +void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter); + +/* helper */ +void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter); + +#if !_16BPP_HACK +void hq4x_init(void); +void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL); +void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL); +void hq4x_565 (unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL); + +void hq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); +void hq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); + +void lq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); +void lq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); + +void Super2xSaI_4444(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch); +void Super2xSaI_1555(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch); +void Super2xSaI_565 (uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch); +void Super2xSaI_8 (uint8 *srcPtr, uint8 *destPtr, uint32 width, uint32 height, uint32 pitch); + +void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); + +void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter); + +void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter); +#endif + +#endif /* __TEXTUREFILTERS_H__ */ diff --git a/Source/GlideHQ/TextureFilters_2xsai.cpp b/Source/GlideHQ/TextureFilters_2xsai.cpp new file mode 100644 index 000000000..38226df28 --- /dev/null +++ b/Source/GlideHQ/TextureFilters_2xsai.cpp @@ -0,0 +1,155 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Based on Derek Liauw Kie Fa and Rice1964 Super2xSaI code */ + +#include "TextureFilters.h" + +#define GET_RESULT(A, B, C, D) ((A != C || A != D) - (B != C || B != D)) + +void Super2xSaI_8888(uint32 *srcPtr, uint32 *destPtr, uint32 width, uint32 height, uint32 pitch) +{ +#define SAI_INTERPOLATE_8888(A, B) ((A & 0xFEFEFEFE) >> 1) + ((B & 0xFEFEFEFE) >> 1) + (A & B & 0x01010101) +#define SAI_Q_INTERPOLATE_8888(A, B, C, D) ((A & 0xFCFCFCFC) >> 2) + ((B & 0xFCFCFCFC) >> 2) + ((C & 0xFCFCFCFC) >> 2) + ((D & 0xFCFCFCFC) >> 2) \ + + ((((A & 0x03030303) + (B & 0x03030303) + (C & 0x03030303) + (D & 0x03030303)) >> 2) & 0x03030303) + +#define SAI_INTERPOLATE SAI_INTERPOLATE_8888 +#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_8888 + + uint32 destWidth = width << 1; + uint32 destHeight = height << 1; + + uint32 color4, color5, color6; + uint32 color1, color2, color3; + uint32 colorA0, colorA1, colorA2, colorA3; + uint32 colorB0, colorB1, colorB2, colorB3; + uint32 colorS1, colorS2; + uint32 product1a, product1b, product2a, product2b; + +#include "TextureFilters_2xsai.h" + +#undef SAI_INTERPOLATE +#undef SAI_Q_INTERPOLATE +} + +#if !_16BPP_HACK +void Super2xSaI_4444(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch) +{ +#define SAI_INTERPOLATE_4444(A, B) ((A & 0xEEEE) >> 1) + ((B & 0xEEEE) >> 1) + (A & B & 0x1111) +#define SAI_Q_INTERPOLATE_4444(A, B, C, D) ((A & 0xCCCC) >> 2) + ((B & 0xCCCC) >> 2) + ((C & 0xCCCC) >> 2) + ((D & 0xCCCC) >> 2) \ + + ((((A & 0x3333) + (B & 0x3333) + (C & 0x3333) + (D & 0x3333)) >> 2) & 0x3333) + +#define SAI_INTERPOLATE SAI_INTERPOLATE_4444 +#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_4444 + + uint32 destWidth = width << 1; + uint32 destHeight = height << 1; + + uint16 color4, color5, color6; + uint16 color1, color2, color3; + uint16 colorA0, colorA1, colorA2, colorA3; + uint16 colorB0, colorB1, colorB2, colorB3; + uint16 colorS1, colorS2; + uint16 product1a, product1b, product2a, product2b; + +#include "TextureFilters_2xsai.h" + +#undef SAI_INTERPOLATE +#undef SAI_Q_INTERPOLATE +} + +void Super2xSaI_1555(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch) +{ +#define SAI_INTERPOLATE_1555(A, B) ((A & 0x7BDE) >> 1) + ((B & 0x7BDE) >> 1) + (A & B & 0x8421) +#define SAI_Q_INTERPOLATE_1555(A, B, C, D) ((A & 0x739C) >> 2) + ((B & 0x739C) >> 2) + ((C & 0x739C) >> 2) + ((D & 0x739C) >> 2) \ + + ((((A & 0x8C63) + (B & 0x8C63) + (C & 0x8C63) + (D & 0x8C63)) >> 2) & 0x8C63) + +#define SAI_INTERPOLATE SAI_INTERPOLATE_1555 +#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_1555 + + uint32 destWidth = width << 1; + uint32 destHeight = height << 1; + + uint16 color4, color5, color6; + uint16 color1, color2, color3; + uint16 colorA0, colorA1, colorA2, colorA3; + uint16 colorB0, colorB1, colorB2, colorB3; + uint16 colorS1, colorS2; + uint16 product1a, product1b, product2a, product2b; + +#include "TextureFilters_2xsai.h" + +#undef SAI_INTERPOLATE +#undef SAI_Q_INTERPOLATE +} + +void Super2xSaI_565(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch) +{ +#define SAI_INTERPOLATE_565(A, B) ((A & 0xF7DE) >> 1) + ((B & 0xF7DE) >> 1) + (A & B & 0x0821) +#define SAI_Q_INTERPOLATE_565(A, B, C, D) ((A & 0xE79C) >> 2) + ((B & 0xE79C) >> 2) + ((C & 0xE79C) >> 2) + ((D & 0xE79C) >> 2) \ + + ((((A & 0x1863) + (B & 0x1863) + (C & 0x1863) + (D & 0x1863)) >> 2) & 0x1863) + +#define SAI_INTERPOLATE SAI_INTERPOLATE_565 +#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_565 + + uint32 destWidth = width << 1; + uint32 destHeight = height << 1; + + uint16 color4, color5, color6; + uint16 color1, color2, color3; + uint16 colorA0, colorA1, colorA2, colorA3; + uint16 colorB0, colorB1, colorB2, colorB3; + uint16 colorS1, colorS2; + uint16 product1a, product1b, product2a, product2b; + +#include "TextureFilters_2xsai.h" + +#undef SAI_INTERPOLATE +#undef SAI_Q_INTERPOLATE +} + +void Super2xSaI_8(uint8 *srcPtr, uint8 *destPtr, uint32 width, uint32 height, uint32 pitch) +{ +#define SAI_INTERPOLATE_8(A, B) ((A & 0xFE) >> 1) + ((B & 0xFE) >> 1) + (A & B & 0x01) +#define SAI_Q_INTERPOLATE_8(A, B, C, D) ((A & 0xFC) >> 2) + ((B & 0xFC) >> 2) + ((C & 0xFC) >> 2) + ((D & 0xFC) >> 2) \ + + ((((A & 0x03) + (B & 0x03) + (C & 0x03) + (D & 0x03)) >> 2) & 0x03) + +#define SAI_INTERPOLATE SAI_INTERPOLATE_8 +#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_8 + + uint32 destWidth = width << 1; + uint32 destHeight = height << 1; + + uint8 color4, color5, color6; + uint8 color1, color2, color3; + uint8 colorA0, colorA1, colorA2, colorA3; + uint8 colorB0, colorB1, colorB2, colorB3; + uint8 colorS1, colorS2; + uint8 product1a, product1b, product2a, product2b; + +#include "TextureFilters_2xsai.h" + +#undef SAI_INTERPOLATE +#undef SAI_Q_INTERPOLATE +} +#endif /* !_16BPP_HACK */ diff --git a/Source/GlideHQ/TextureFilters_2xsai.h b/Source/GlideHQ/TextureFilters_2xsai.h new file mode 100644 index 000000000..f6696ae06 --- /dev/null +++ b/Source/GlideHQ/TextureFilters_2xsai.h @@ -0,0 +1,145 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Based on Derek Liauw Kie Fa and Rice1964 Super2xSaI code */ + + int row0, row1, row2, row3; + int col0, col1, col2, col3; + + uint16 x; + uint16 y; + + for (y = 0; y < height; y++) { + if ((y > 0) && (y < height - 1)) { + row0 = width; + row0 = -row0; + row1 = 0; + row2 = width; + row3 = (y == height - 2 ? width : width << 1); + } else { + row0 = 0; + row1 = 0; + row2 = 0; + row3 = 0; + } + + for (x = 0; x < width; x++) { +//--------------------------------------- B0 B1 B2 B3 +// 4 5 6 S2 +// 1 2 3 S1 +// A0 A1 A2 A3 + if ((x > 0) && (x < width - 1)) { + col0 = -1; + col1 = 0; + col2 = 1; + col3 = (x == width - 2 ? 1 : 2); + } else { + col0 = 0; + col1 = 0; + col2 = 0; + col3 = 0; + } + + colorB0 = *(srcPtr + col0 + row0); + colorB1 = *(srcPtr + col1 + row0); + colorB2 = *(srcPtr + col2 + row0); + colorB3 = *(srcPtr + col3 + row0); + + color4 = *(srcPtr + col0 + row1); + color5 = *(srcPtr + col1 + row1); + color6 = *(srcPtr + col2 + row1); + colorS2 = *(srcPtr + col3 + row1); + + color1 = *(srcPtr + col0 + row2); + color2 = *(srcPtr + col1 + row2); + color3 = *(srcPtr + col2 + row2); + colorS1 = *(srcPtr + col3 + row2); + + colorA0 = *(srcPtr + col0 + row3); + colorA1 = *(srcPtr + col1 + row3); + colorA2 = *(srcPtr + col2 + row3); + colorA3 = *(srcPtr + col3 + row3); + +//-------------------------------------- + if (color2 == color6 && color5 != color3) + product2b = product1b = color2; + else if (color5 == color3 && color2 != color6) + product2b = product1b = color5; + else if (color5 == color3 && color2 == color6) { + int r = 0; + + r += GET_RESULT(color6, color5, color1, colorA1); + r += GET_RESULT(color6, color5, color4, colorB1); + r += GET_RESULT(color6, color5, colorA2, colorS1); + r += GET_RESULT(color6, color5, colorB2, colorS2); + + if (r > 0) + product2b = product1b = color6; + else if (r < 0) + product2b = product1b = color5; + else + product2b = product1b = SAI_INTERPOLATE(color5, color6); + + } else { + + if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) + product2b = SAI_Q_INTERPOLATE(color3, color3, color3, color2); + else if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) + product2b = SAI_Q_INTERPOLATE(color2, color2, color2, color3); + else + product2b = SAI_INTERPOLATE(color2, color3); + + if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) + product1b = SAI_Q_INTERPOLATE(color6, color6, color6, color5); + else if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) + product1b = SAI_Q_INTERPOLATE(color6, color5, color5, color5); + else + product1b = SAI_INTERPOLATE(color5, color6); + } + + if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) + product2a = SAI_INTERPOLATE(color2, color5); + else if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) + product2a = SAI_INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) + product1a = SAI_INTERPOLATE(color2, color5); + else if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) + product1a = SAI_INTERPOLATE(color2, color5); + else + product1a = color5; + + + destPtr[0] = product1a; + destPtr[1] = product1b; + destPtr[destWidth] = product2a; + destPtr[destWidth + 1] = product2b; + + srcPtr++; + destPtr += 2; + } + srcPtr += (pitch-width); + destPtr += (((pitch-width)<<1)+(pitch<<1)); + } diff --git a/Source/GlideHQ/TextureFilters_hq2x.cpp b/Source/GlideHQ/TextureFilters_hq2x.cpp new file mode 100644 index 000000000..33cb99530 --- /dev/null +++ b/Source/GlideHQ/TextureFilters_hq2x.cpp @@ -0,0 +1,1510 @@ +/* +Copyright (C) 2003 Rice1964 + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +*/ + +/* Copyright (C) 2007 Hiroshi Morii + * Modified for the Texture Filtering library + */ + +/* 2007 Mudlord - Added hq2xS lq2xS filters */ + +#include "TextureFilters.h" + +/************************************************************************/ +/* hq2x filters */ +/************************************************************************/ + +/***************************************************************************/ +/* Basic types */ + +/***************************************************************************/ +/* interpolation */ + +//static unsigned interp_bits_per_pixel; + +#if !_16BPP_HACK +#define INTERP_16_MASK_1_3(v) ((v)&0x0F0F) +#define INTERP_16_MASK_SHIFT_2_4(v) (((v)&0xF0F0)>>4) +#define INTERP_16_MASK_SHIFTBACK_2_4(v) ((INTERP_16_MASK_1_3(v))<<4) + +static uint16 hq2x_interp_16_521(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*5 + INTERP_16_MASK_1_3(p2)*2 + INTERP_16_MASK_1_3(p3)*1) / 8) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*5 + INTERP_16_MASK_SHIFT_2_4(p2)*2 + INTERP_16_MASK_SHIFT_2_4(p3)*1) / 8); +} + +static uint16 hq2x_interp_16_332(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*3 + INTERP_16_MASK_1_3(p2)*3 + INTERP_16_MASK_1_3(p3)*2) / 8) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*3 + INTERP_16_MASK_SHIFT_2_4(p2)*3 + INTERP_16_MASK_SHIFT_2_4(p3)*2) / 8); +} + +static uint16 hq2x_interp_16_611(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*6 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 8) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*6 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 8); +} + +static uint16 hq2x_interp_16_71(uint16 p1, uint16 p2) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*7 + INTERP_16_MASK_1_3(p2)) / 8) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*7 + INTERP_16_MASK_SHIFT_2_4(p2)) / 8); +} + +static uint16 hq2x_interp_16_211(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*2 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 4) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*2 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 4); +} + +static uint16 hq2x_interp_16_772(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3(((INTERP_16_MASK_1_3(p1) + INTERP_16_MASK_1_3(p2))*7 + INTERP_16_MASK_1_3(p3)*2) / 16) + | INTERP_16_MASK_SHIFTBACK_2_4(((INTERP_16_MASK_SHIFT_2_4(p1) + INTERP_16_MASK_SHIFT_2_4(p2))*7 + INTERP_16_MASK_SHIFT_2_4(p3)*2) / 16); +} + +static uint16 hq2x_interp_16_11(uint16 p1, uint16 p2) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1) + INTERP_16_MASK_1_3(p2)) / 2) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1) + INTERP_16_MASK_SHIFT_2_4(p2)) / 2); +} + +static uint16 hq2x_interp_16_31(uint16 p1, uint16 p2) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*3 + INTERP_16_MASK_1_3(p2)) / 4) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*3 + INTERP_16_MASK_SHIFT_2_4(p2)) / 4); +} + +static uint16 hq2x_interp_16_1411(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*14 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 16) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*14 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 16); +} + +static uint16 hq2x_interp_16_431(uint16 p1, uint16 p2, uint16 p3) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*4 + INTERP_16_MASK_1_3(p2)*3 + INTERP_16_MASK_1_3(p3)) / 8) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*4 + INTERP_16_MASK_SHIFT_2_4(p2)*3 + INTERP_16_MASK_SHIFT_2_4(p3)) / 8); +} + +static uint16 hq2x_interp_16_53(uint16 p1, uint16 p2) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*5 + INTERP_16_MASK_1_3(p2)*3) / 8) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*5 + INTERP_16_MASK_SHIFT_2_4(p2)*3) / 8); +} + +static uint16 hq2x_interp_16_151(uint16 p1, uint16 p2) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*15 + INTERP_16_MASK_1_3(p2)) / 16) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*15 + INTERP_16_MASK_SHIFT_2_4(p2)) / 16); +} + +static uint16 hq2x_interp_16_97(uint16 p1, uint16 p2) +{ + return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*9 + INTERP_16_MASK_1_3(p2)*7) / 16) + | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*9 + INTERP_16_MASK_SHIFT_2_4(p2)*7) / 16); +} +#endif /* !_16BPP_HACK */ + +#define INTERP_32_MASK_1_3(v) ((v)&0x00FF00FF) +#define INTERP_32_MASK_SHIFT_2_4(v) (((v)&0xFF00FF00)>>8) +#define INTERP_32_MASK_SHIFTBACK_2_4(v) (((INTERP_32_MASK_1_3(v))<<8)) + +static uint32 hq2x_interp_32_521(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*5 + INTERP_32_MASK_1_3(p2)*2 + INTERP_32_MASK_1_3(p3)*1) / 8) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*5 + INTERP_32_MASK_SHIFT_2_4(p2)*2 + INTERP_32_MASK_SHIFT_2_4(p3)*1) / 8); +} + +static uint32 hq2x_interp_32_332(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*3 + INTERP_32_MASK_1_3(p2)*3 + INTERP_32_MASK_1_3(p3)*2) / 8) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*3 + INTERP_32_MASK_SHIFT_2_4(p2)*3 + INTERP_32_MASK_SHIFT_2_4(p3)*2) / 8); +} + +static uint32 hq2x_interp_32_211(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*2 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 4) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*2 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 4); +} + +static uint32 hq2x_interp_32_611(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*6 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 8) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*6 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 8); +} + +static uint32 hq2x_interp_32_71(uint32 p1, uint32 p2) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*7 + INTERP_32_MASK_1_3(p2)) / 8) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*7 + INTERP_32_MASK_SHIFT_2_4(p2)) / 8); +} + +static uint32 hq2x_interp_32_772(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3(((INTERP_32_MASK_1_3(p1) + INTERP_32_MASK_1_3(p2))*7 + INTERP_32_MASK_1_3(p3)*2) / 16) + | INTERP_32_MASK_SHIFTBACK_2_4(((INTERP_32_MASK_SHIFT_2_4(p1) + INTERP_32_MASK_SHIFT_2_4(p2))*7 + INTERP_32_MASK_SHIFT_2_4(p3)*2) / 16); +} + +static uint32 hq2x_interp_32_11(uint32 p1, uint32 p2) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1) + INTERP_32_MASK_1_3(p2)) / 2) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1) + INTERP_32_MASK_SHIFT_2_4(p2)) / 2); +} + +static uint32 hq2x_interp_32_31(uint32 p1, uint32 p2) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*3 + INTERP_32_MASK_1_3(p2)) / 4) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*3 + INTERP_32_MASK_SHIFT_2_4(p2)) / 4); +} + +static uint32 hq2x_interp_32_1411(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*14 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 16) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*14 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 16); +} + +static uint32 hq2x_interp_32_431(uint32 p1, uint32 p2, uint32 p3) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*4 + INTERP_32_MASK_1_3(p2)*3 + INTERP_32_MASK_1_3(p3)) / 8) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*4 + INTERP_32_MASK_SHIFT_2_4(p2)*3 + INTERP_32_MASK_SHIFT_2_4(p3)) / 8); +} + +static uint32 hq2x_interp_32_53(uint32 p1, uint32 p2) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*5 + INTERP_32_MASK_1_3(p2)*3) / 8) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*5 + INTERP_32_MASK_SHIFT_2_4(p2)*3) / 8); +} + +static uint32 hq2x_interp_32_151(uint32 p1, uint32 p2) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*15 + INTERP_32_MASK_1_3(p2)) / 16) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*15 + INTERP_32_MASK_SHIFT_2_4(p2)) / 16); +} + +static uint32 hq2x_interp_32_97(uint32 p1, uint32 p2) +{ + return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*9 + INTERP_32_MASK_1_3(p2)*7) / 16) + | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*9 + INTERP_32_MASK_SHIFT_2_4(p2)*7) / 16); +} + +/***************************************************************************/ +/* diff */ + +#define INTERP_Y_LIMIT (0x30*4) +#define INTERP_U_LIMIT (0x07*4) +#define INTERP_V_LIMIT (0x06*8) + +#if !_16BPP_HACK +static int hq2x_interp_16_diff(uint16 p1, uint16 p2) +{ + int r, g, b; + int y, u, v; + + if (p1 == p2) + return 0; + + b = (int)((p1 & 0x000F) - (p2 & 0x000F)); + g = (int)((p1 & 0x00F0) - (p2 & 0x00F0)) >> 4; + r = (int)((p1 & 0x0F00) - (p2 & 0x0F00)) >> 8; + + y = r + g + b; + u = r - b; + v = -r + 2*g - b; + + if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT) + return 1; + + if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT) + return 1; + + if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT) + return 1; + + return 0; +} +#endif /* !_16BPP_HACK */ + +static int hq2x_interp_32_diff(uint32 p1, uint32 p2) +{ + int r, g, b; + int y, u, v; + + if ((p1 & 0xF8F8F8) == (p2 & 0xF8F8F8)) + return 0; + + b = (int)((p1 & 0xFF) - (p2 & 0xFF)); + g = (int)((p1 & 0xFF00) - (p2 & 0xFF00)) >> 8; + r = (int)((p1 & 0xFF0000) - (p2 & 0xFF0000)) >> 16; + + y = r + g + b; + u = r - b; + v = -r + 2*g - b; + + if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT) + return 1; + + if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT) + return 1; + + if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT) + return 1; + + return 0; +} + +/*static void interp_set(unsigned bits_per_pixel) +{ + interp_bits_per_pixel = bits_per_pixel; +}*/ + +#if !_16BPP_HACK +static void hq2x_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count) +{ + unsigned i; + + for(i=0;i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i> 3; + r = (int)((c[j] & 0xF800)) >> 8; + } else { + b = (int)((c[j] & 0x1F)) << 3; + g = (int)((c[j] & 0x3E0)) >> 2; + r = (int)((c[j] & 0x7C00)) >> 7; + } + const int bright = r+r+r + g+g+g + b+b; + if(bright > maxBright) maxBright = bright; + if(bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if(diffBright > 7) { +#define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[4]; + if(ABS(brightArray[0] - centerBright) > diffBright) + mask |= 1 << 0; + if(ABS(brightArray[1] - centerBright) > diffBright) + mask |= 1 << 1; + if(ABS(brightArray[2] - centerBright) > diffBright) + mask |= 1 << 2; + if(ABS(brightArray[3] - centerBright) > diffBright) + mask |= 1 << 3; + if(ABS(brightArray[5] - centerBright) > diffBright) + mask |= 1 << 4; + if(ABS(brightArray[6] - centerBright) > diffBright) + mask |= 1 << 5; + if(ABS(brightArray[7] - centerBright) > diffBright) + mask |= 1 << 6; + if(ABS(brightArray[8] - centerBright) > diffBright) + mask |= 1 << 7; + } + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define HQ2X_MUR false +#define HQ2X_MDR false +#define HQ2X_MDL false +#define HQ2X_MUL false +#define IC(p0) c[p0] +#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1]) +#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2]) +#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1]) +#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2]) +#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2]) +#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2]) +#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1]) +#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2]) +#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1]) +#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2]) +#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1]) +#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2]) +#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1]) + + switch (mask) { +#include "TextureFilters_hq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef HQ2X_MUR +#undef HQ2X_MDR +#undef HQ2X_MDL +#undef HQ2X_MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} +#endif /* !_16BPP_HACK */ + +static void hq2x_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count) +{ + unsigned i; + + for(i=0;i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = src0[0]; + c[3] = src1[0]; + c[6] = src2[0]; + } + + if (i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = src0[0]; + c[3] = src1[0]; + c[6] = src2[0]; + } + + if (i> 8; + const int r = (int)((c[j] & 0xF80000)) >> 16; + const int bright = r+r+r + g+g+g + b+b; + if(bright > maxBright) maxBright = bright; + if(bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if(diffBright > 7) { +#define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[4]; + if(ABS(brightArray[0] - centerBright) > diffBright) + mask |= 1 << 0; + if(ABS(brightArray[1] - centerBright) > diffBright) + mask |= 1 << 1; + if(ABS(brightArray[2] - centerBright) > diffBright) + mask |= 1 << 2; + if(ABS(brightArray[3] - centerBright) > diffBright) + mask |= 1 << 3; + if(ABS(brightArray[5] - centerBright) > diffBright) + mask |= 1 << 4; + if(ABS(brightArray[6] - centerBright) > diffBright) + mask |= 1 << 5; + if(ABS(brightArray[7] - centerBright) > diffBright) + mask |= 1 << 6; + if(ABS(brightArray[8] - centerBright) > diffBright) + mask |= 1 << 7; + } +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define HQ2X_MUR false +#define HQ2X_MDR false +#define HQ2X_MDL false +#define HQ2X_MUL false +#define IC(p0) c[p0] +#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1]) +#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2]) +#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1]) +#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2]) +#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2]) +#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2]) +#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1]) +#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2]) +#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1]) +#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2]) +#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1]) +#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2]) +#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1]) + + switch (mask) { +#include "TextureFilters_hq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef HQ2X_MUR +#undef HQ2X_MDR +#undef HQ2X_MDL +#undef HQ2X_MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +/***************************************************************************/ +/* LQ2x C implementation */ + +/* +* This effect is derived from the hq2x effect made by Maxim Stepin +*/ + +#if !_16BPP_HACK +static void lq2x_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count) +{ + unsigned i; + + for(i=0;i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i> 8; + const int r = (int)((c[j] & 0xF80000)) >> 16; + const int bright = r+r+r + g+g+g + b+b; + if(bright > maxBright) maxBright = bright; + if(bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if(diffBright > 7) { +#define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[4]; + if(ABS(brightArray[0] - centerBright) > diffBright) + mask |= 1 << 0; + if(ABS(brightArray[1] - centerBright) > diffBright) + mask |= 1 << 1; + if(ABS(brightArray[2] - centerBright) > diffBright) + mask |= 1 << 2; + if(ABS(brightArray[3] - centerBright) > diffBright) + mask |= 1 << 3; + if(ABS(brightArray[5] - centerBright) > diffBright) + mask |= 1 << 4; + if(ABS(brightArray[6] - centerBright) > diffBright) + mask |= 1 << 5; + if(ABS(brightArray[7] - centerBright) > diffBright) + mask |= 1 << 6; + if(ABS(brightArray[8] - centerBright) > diffBright) + mask |= 1 << 7; + } + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define HQ2X_MUR false +#define HQ2X_MDR false +#define HQ2X_MDL false +#define HQ2X_MUL false +#define IC(p0) c[p0] +#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1]) +#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2]) +#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1]) +#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2]) +#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2]) +#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2]) +#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1]) +#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2]) +#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1]) +#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2]) +#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1]) +#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2]) +#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1]) + + switch (mask) { +#include "TextureFilters_lq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef HQ2X_MUR +#undef HQ2X_MDR +#undef HQ2X_MDL +#undef HQ2X_MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} +#endif /* !_16BPP_HACK */ + +static void lq2x_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count) +{ + unsigned i; + + for(i=0;i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i0) { + c[0] = src0[-1]; + c[3] = src1[-1]; + c[6] = src2[-1]; + } else { + c[0] = c[1]; + c[3] = c[4]; + c[6] = c[7]; + } + + if (i> 8; + const int r = (int)((c[j] & 0xF80000)) >> 16; + const int bright = r+r+r + g+g+g + b+b; + if(bright > maxBright) maxBright = bright; + if(bright < minBright) minBright = bright; + + brightArray[j] = bright; + } + int diffBright = ((maxBright - minBright) * 7) >> 4; + if(diffBright > 7) { +#define ABS(x) ((x) < 0 ? -(x) : (x)) + + const int centerBright = brightArray[4]; + if(ABS(brightArray[0] - centerBright) > diffBright) + mask |= 1 << 0; + if(ABS(brightArray[1] - centerBright) > diffBright) + mask |= 1 << 1; + if(ABS(brightArray[2] - centerBright) > diffBright) + mask |= 1 << 2; + if(ABS(brightArray[3] - centerBright) > diffBright) + mask |= 1 << 3; + if(ABS(brightArray[5] - centerBright) > diffBright) + mask |= 1 << 4; + if(ABS(brightArray[6] - centerBright) > diffBright) + mask |= 1 << 5; + if(ABS(brightArray[7] - centerBright) > diffBright) + mask |= 1 << 6; + if(ABS(brightArray[8] - centerBright) > diffBright) + mask |= 1 << 7; + } + +#define P0 dst0[0] +#define P1 dst0[1] +#define P2 dst1[0] +#define P3 dst1[1] +#define HQ2X_MUR false +#define HQ2X_MDR false +#define HQ2X_MDL false +#define HQ2X_MUL false +#define IC(p0) c[p0] +#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1]) +#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2]) +#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1]) +#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2]) +#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2]) +#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2]) +#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1]) +#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2]) +#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1]) +#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2]) +#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1]) +#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2]) +#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1]) + + switch (mask) { +#include "TextureFilters_lq2x.h" + } + +#undef P0 +#undef P1 +#undef P2 +#undef P3 +#undef HQ2X_MUR +#undef HQ2X_MDR +#undef HQ2X_MDL +#undef HQ2X_MUL +#undef IC +#undef I11 +#undef I211 +#undef I31 +#undef I332 +#undef I431 +#undef I521 +#undef I53 +#undef I611 +#undef I71 +#undef I772 +#undef I97 +#undef I1411 +#undef I151 + + src0 += 1; + src1 += 1; + src2 += 1; + dst0 += 2; + dst1 += 2; + } +} + +#if !_16BPP_HACK +void hq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint16 *dst0 = (uint16 *)dstPtr; + uint16 *dst1 = dst0 + (dstPitch >> 1); + + uint16 *src0 = (uint16 *)srcPtr; + uint16 *src1 = src0 + (srcPitch >> 1); + uint16 *src2 = src1 + (srcPitch >> 1); + + int count; + + hq2x_16_def(dst0, dst1, src0, src0, src1, width); + if( height == 1 ) return; + + count = height; + + count -= 2; + while(count>0) { + dst0 += dstPitch; + dst1 += dstPitch; + hq2x_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + hq2x_16_def(dst0, dst1, src0, src1, src1, width); +} + + +void hq2xS_16(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */, + u8 *dstPtr, u32 dstPitch, int width, int height) +{ + u16 *dst0 = (u16 *)dstPtr; + u16 *dst1 = dst0 + (dstPitch >> 1); + + u16 *src0 = (u16 *)srcPtr; + u16 *src1 = src0 + (srcPitch >> 1); + u16 *src2 = src1 + (srcPitch >> 1); + + hq2xS_16_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while(count) { + dst0 += dstPitch; + dst1 += dstPitch; + hq2xS_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + hq2xS_16_def(dst0, dst1, src0, src1, src1, width); +} +#endif /* !_16BPP_HACK */ + +void hq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint32 *dst0 = (uint32 *)dstPtr; + uint32 *dst1 = dst0 + (dstPitch >> 2); + + uint32 *src0 = (uint32 *)srcPtr; + uint32 *src1 = src0 + (srcPitch >> 2); + uint32 *src2 = src1 + (srcPitch >> 2); + + int count; + + hq2x_32_def(dst0, dst1, src0, src0, src1, width); + if( height == 1 ) return; + + count = height; + + count -= 2; + while(count>0) { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2x_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2x_32_def(dst0, dst1, src0, src1, src1, width); +} + +void hq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint32 *dst0 = (uint32 *)dstPtr; + uint32 *dst1 = dst0 + (dstPitch >> 2); + + uint32 *src0 = (uint32 *)srcPtr; + uint32 *src1 = src0 + (srcPitch >> 2); + uint32 *src2 = src1 + (srcPitch >> 2); + hq2xS_32_def(dst0, dst1, src0, src0, src1, width); + + int count = height; + + count -= 2; + while(count) { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2xS_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2xS_32_def(dst0, dst1, src0, src1, src1, width); +} + +#if !_16BPP_HACK +void lq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint16 *dst0 = (uint16 *)dstPtr; + uint16 *dst1 = dst0 + (dstPitch >> 1); + + uint16 *src0 = (uint16 *)srcPtr; + uint16 *src1 = src0 + (srcPitch >> 1); + uint16 *src2 = src1 + (srcPitch >> 1); + + int count; + + lq2x_16_def(dst0, dst1, src0, src0, src1, width); + if( height == 1 ) return; + + count = height; + + count -= 2; + while(count>0) { + dst0 += dstPitch; + dst1 += dstPitch; + hq2x_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + lq2x_16_def(dst0, dst1, src0, src1, src1, width); +} + +void lq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint16 *dst0 = (uint16 *)dstPtr; + uint16 *dst1 = dst0 + (dstPitch >> 1); + + uint16 *src0 = (uint16 *)srcPtr; + uint16 *src1 = src0 + (srcPitch >> 1); + uint16 *src2 = src1 + (srcPitch >> 1); + + int count; + + lq2xS_16_def(dst0, dst1, src0, src0, src1, width); + if( height == 1 ) return; + + count = height; + + count -= 2; + while(count>0) { + dst0 += dstPitch; + dst1 += dstPitch; + hq2x_16_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 1; + --count; + } + dst0 += dstPitch; + dst1 += dstPitch; + lq2xS_16_def(dst0, dst1, src0, src1, src1, width); +} +#endif /* !_16BPP_HACK */ + +void lq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint32 *dst0 = (uint32 *)dstPtr; + uint32 *dst1 = dst0 + (dstPitch >> 2); + + uint32 *src0 = (uint32 *)srcPtr; + uint32 *src1 = src0 + (srcPitch >> 2); + uint32 *src2 = src1 + (srcPitch >> 2); + + int count; + + lq2x_32_def(dst0, dst1, src0, src0, src1, width); + if( height == 1 ) return; + + count = height; + + count -= 2; + while(count>0) { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2x_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + lq2x_32_def(dst0, dst1, src0, src1, src1, width); +} + +void lq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) +{ + uint32 *dst0 = (uint32 *)dstPtr; + uint32 *dst1 = dst0 + (dstPitch >> 2); + + uint32 *src0 = (uint32 *)srcPtr; + uint32 *src1 = src0 + (srcPitch >> 2); + uint32 *src2 = src1 + (srcPitch >> 2); + + int count; + + lq2xS_32_def(dst0, dst1, src0, src0, src1, width); + if( height == 1 ) return; + + count = height; + + count -= 2; + while(count>0) { + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + hq2x_32_def(dst0, dst1, src0, src1, src2, width); + src0 = src1; + src1 = src2; + src2 += srcPitch >> 2; + --count; + } + dst0 += dstPitch >> 1; + dst1 += dstPitch >> 1; + lq2xS_32_def(dst0, dst1, src0, src1, src1, width); +} + +/************************************************************************/ +/* hq3x filters */ +/************************************************************************/ + +/************************************************************************/ +/* scale2x filters */ +/************************************************************************/ + +/************************************************************************/ +/* scale3x filters */ +/************************************************************************/ + diff --git a/Source/GlideHQ/TextureFilters_hq2x.h b/Source/GlideHQ/TextureFilters_hq2x.h new file mode 100644 index 000000000..7946323b8 --- /dev/null +++ b/Source/GlideHQ/TextureFilters_hq2x.h @@ -0,0 +1,1847 @@ +/* +Copyright (C) 2003 Rice1964 + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +*/ + +/* Copyright (C) 2007 Hiroshi Morii + * Modified for the Texture Filtering library + */ + +case 0 : +case 1 : +case 4 : +case 5 : +case 32 : +case 33 : +case 36 : +case 37 : +case 128 : +case 129 : +case 132 : +case 133 : +case 160 : +case 161 : +case 164 : +case 165 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 2 : +case 34 : +case 130 : +case 162 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 3 : +case 35 : +case 131 : +case 163 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 6 : +case 38 : +case 134 : +case 166 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 7 : +case 39 : +case 135 : +case 167 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I211(4, 3, 7); + P3 = I211(4, 5, 7); +} break; +case 8 : +case 12 : +case 136 : +case 140 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); +} break; +case 9 : +case 13 : +case 137 : +case 141 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); +} break; +case 10 : +case 138 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 11 : +case 139 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 14 : +case 142 : +{ + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + P1 = I31(4, 5); + } else { + P0 = I332(1, 3, 4); + P1 = I521(4, 1, 5); + } +} break; +case 15 : +case 143 : +{ + P2 = I31(4, 6); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = IC(4); + P1 = I31(4, 5); + } else { + P0 = I332(1, 3, 4); + P1 = I521(4, 1, 5); + } +} break; +case 16 : +case 17 : +case 48 : +case 49 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); +} break; +case 18 : +case 50 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 19 : +case 51 : +{ + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P0 = I31(4, 3); + P1 = I31(4, 2); + } else { + P0 = I521(4, 1, 3); + P1 = I332(1, 5, 4); + } +} break; +case 20 : +case 21 : +case 52 : +case 53 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); +} break; +case 22 : +case 54 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 23 : +case 55 : +{ + P2 = I211(4, 3, 7); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P0 = I31(4, 3); + P1 = IC(4); + } else { + P0 = I521(4, 1, 3); + P1 = I332(1, 5, 4); + } +} break; +case 24 : +case 66 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 25 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 26 : +case 31 : +case 95 : +{ + P2 = I31(4, 6); + P3 = I31(4, 8); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 27 : +case 75 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 28 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 29 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 30 : +case 86 : +{ + P0 = I31(4, 0); + P2 = I31(4, 6); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 40 : +case 44 : +case 168 : +case 172 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); +} break; +case 41 : +case 45 : +case 169 : +case 173 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); +} break; +case 42 : +case 170 : +{ + P1 = I31(4, 2); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + P2 = I31(4, 7); + } else { + P0 = I332(1, 3, 4); + P2 = I521(4, 3, 7); + } +} break; +case 43 : +case 171 : +{ + P1 = I31(4, 2); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = IC(4); + P2 = I31(4, 7); + } else { + P0 = I332(1, 3, 4); + P2 = I521(4, 3, 7); + } +} break; +case 46 : +case 174 : +{ + P1 = I31(4, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 47 : +case 175 : +{ + P1 = I31(4, 5); + P2 = I31(4, 7); + P3 = I211(4, 5, 7); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 56 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 57 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 58 : +{ + P2 = I31(4, 7); + P3 = I31(4, 8); + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 59 : +{ + P2 = I31(4, 7); + P3 = I31(4, 8); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 60 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 61 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 8); +} break; +case 62 : +{ + P0 = I31(4, 0); + P2 = I31(4, 7); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 63 : +{ + P2 = I31(4, 7); + P3 = I31(4, 8); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 64 : +case 65 : +case 68 : +case 69 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 67 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 70 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 71 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 8); +} break; +case 72 : +case 76 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 73 : +case 77 : +{ + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P0 = I31(4, 1); + P2 = I31(4, 6); + } else { + P0 = I521(4, 3, 1); + P2 = I332(3, 7, 4); + } +} break; +case 74 : +case 107 : +case 123 : +{ + P1 = I31(4, 2); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 78 : +{ + P1 = I31(4, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 79 : +{ + P1 = I31(4, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 80 : +case 81 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 82 : +case 214 : +case 222 : +{ + P0 = I31(4, 0); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 83 : +{ + P0 = I31(4, 3); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 84 : +case 85 : +{ + P0 = I211(4, 1, 3); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P1 = I31(4, 1); + P3 = I31(4, 8); + } else { + P1 = I521(4, 5, 1); + P3 = I332(5, 7, 4); + } +} break; +case 87 : +{ + P0 = I31(4, 3); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 88 : +case 248 : +case 250 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 89 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 90 : +{ + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 91 : +{ + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 92 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 93 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 94 : +{ + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 96 : +case 97 : +case 100 : +case 101 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 98 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 99 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 102 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 103 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 8); +} break; +case 104 : +case 108 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 105 : +case 109 : +{ + P1 = I211(4, 1, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P0 = I31(4, 1); + P2 = IC(4); + } else { + P0 = I521(4, 3, 1); + P2 = I332(3, 7, 4); + } +} break; +case 106 : +case 120 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 110 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 111 : +{ + P1 = I31(4, 5); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 112 : +case 113 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + if (HQ2X_MDR) { + P2 = I31(4, 3); + P3 = I31(4, 8); + } else { + P2 = I521(4, 7, 3); + P3 = I332(5, 7, 4); + } +} break; +case 114 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 115 : +{ + P0 = I31(4, 3); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 116 : +case 117 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 118 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 119 : +{ + P2 = I31(4, 3); + P3 = I31(4, 8); + if (HQ2X_MUR) { + P0 = I31(4, 3); + P1 = IC(4); + } else { + P0 = I521(4, 1, 3); + P1 = I332(1, 5, 4); + } +} break; +case 121 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } +} break; +case 122 : +{ + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = I31(4, 8); + } else { + P3 = I611(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 124 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } +} break; +case 125 : +{ + P1 = I31(4, 1); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P0 = I31(4, 1); + P2 = IC(4); + } else { + P0 = I521(4, 3, 1); + P2 = I332(3, 7, 4); + } +} break; +case 126 : +{ + P0 = I31(4, 0); + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 127 : +{ + P3 = I31(4, 8); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 144 : +case 145 : +case 176 : +case 177 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); +} break; +case 146 : +case 178 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + if (HQ2X_MUR) { + P1 = I31(4, 2); + P3 = I31(4, 7); + } else { + P1 = I332(1, 5, 4); + P3 = I521(4, 5, 7); + } +} break; +case 147 : +case 179 : +{ + P0 = I31(4, 3); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 148 : +case 149 : +case 180 : +case 181 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); +} break; +case 150 : +case 182 : +{ + P0 = I31(4, 0); + P2 = I211(4, 3, 7); + if (HQ2X_MUR) { + P1 = IC(4); + P3 = I31(4, 7); + } else { + P1 = I332(1, 5, 4); + P3 = I521(4, 5, 7); + } +} break; +case 151 : +case 183 : +{ + P0 = I31(4, 3); + P2 = I211(4, 3, 7); + P3 = I31(4, 7); + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 152 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 153 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 154 : +{ + P2 = I31(4, 6); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 155 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 156 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 157 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 6); + P3 = I31(4, 7); +} break; +case 158 : +{ + P2 = I31(4, 6); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 159 : +{ + P2 = I31(4, 6); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 184 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 185 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 186 : +{ + P2 = I31(4, 7); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 187 : +{ + P1 = I31(4, 2); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = IC(4); + P2 = I31(4, 7); + } else { + P0 = I332(1, 3, 4); + P2 = I521(4, 3, 7); + } +} break; +case 188 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 189 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + P2 = I31(4, 7); + P3 = I31(4, 7); +} break; +case 190 : +{ + P0 = I31(4, 0); + P2 = I31(4, 7); + if (HQ2X_MUR) { + P1 = IC(4); + P3 = I31(4, 7); + } else { + P1 = I332(1, 5, 4); + P3 = I521(4, 5, 7); + } +} break; +case 191 : +{ + P2 = I31(4, 7); + P3 = I31(4, 7); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 192 : +case 193 : +case 196 : +case 197 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 194 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 195 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 198 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 199 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 6); + P3 = I31(4, 5); +} break; +case 200 : +case 204 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + if (HQ2X_MDL) { + P2 = I31(4, 6); + P3 = I31(4, 5); + } else { + P2 = I332(3, 7, 4); + P3 = I521(4, 7, 5); + } +} break; +case 201 : +case 205 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } +} break; +case 202 : +{ + P1 = I31(4, 2); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 203 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + P3 = I31(4, 5); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 206 : +{ + P1 = I31(4, 5); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 207 : +{ + P2 = I31(4, 6); + P3 = I31(4, 5); + if (HQ2X_MUL) { + P0 = IC(4); + P1 = I31(4, 5); + } else { + P0 = I332(1, 3, 4); + P1 = I521(4, 1, 5); + } +} break; +case 208 : +case 209 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 210 : +case 216 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 211 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 212 : +case 213 : +{ + P0 = I211(4, 1, 3); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P1 = I31(4, 1); + P3 = IC(4); + } else { + P1 = I521(4, 5, 1); + P3 = I332(5, 7, 4); + } +} break; +case 215 : +{ + P0 = I31(4, 3); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 217 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 218 : +{ + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 219 : +{ + P1 = I31(4, 2); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 220 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + if (HQ2X_MDL) { + P2 = I31(4, 6); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 221 : +{ + P0 = I31(4, 1); + P2 = I31(4, 6); + if (HQ2X_MDR) { + P1 = I31(4, 1); + P3 = IC(4); + } else { + P1 = I521(4, 5, 1); + P3 = I332(5, 7, 4); + } +} break; +case 223 : +{ + P2 = I31(4, 6); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 224 : +case 225 : +case 228 : +case 229 : +{ + P0 = I211(4, 1, 3); + P1 = I211(4, 1, 5); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 226 : +{ + P0 = I31(4, 0); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 227 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 230 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 231 : +{ + P0 = I31(4, 3); + P1 = I31(4, 5); + P2 = I31(4, 3); + P3 = I31(4, 5); +} break; +case 232 : +case 236 : +{ + P0 = I31(4, 0); + P1 = I211(4, 1, 5); + if (HQ2X_MDL) { + P2 = IC(4); + P3 = I31(4, 5); + } else { + P2 = I332(3, 7, 4); + P3 = I521(4, 7, 5); + } +} break; +case 233 : +case 237 : +{ + P0 = I31(4, 1); + P1 = I211(4, 1, 5); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } +} break; +case 234 : +{ + P1 = I31(4, 2); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = I31(4, 0); + } else { + P0 = I611(4, 1, 3); + } +} break; +case 235 : +{ + P1 = I31(4, 2); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 238 : +{ + P0 = I31(4, 0); + P1 = I31(4, 5); + if (HQ2X_MDL) { + P2 = IC(4); + P3 = I31(4, 5); + } else { + P2 = I332(3, 7, 4); + P3 = I521(4, 7, 5); + } +} break; +case 239 : +{ + P1 = I31(4, 5); + P3 = I31(4, 5); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 240 : +case 241 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 2); + if (HQ2X_MDR) { + P2 = I31(4, 3); + P3 = IC(4); + } else { + P2 = I521(4, 7, 3); + P3 = I332(5, 7, 4); + } +} break; +case 242 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = I31(4, 2); + } else { + P1 = I611(4, 1, 5); + } +} break; +case 243 : +{ + P0 = I31(4, 3); + P1 = I31(4, 2); + if (HQ2X_MDR) { + P2 = I31(4, 3); + P3 = IC(4); + } else { + P2 = I521(4, 7, 3); + P3 = I332(5, 7, 4); + } +} break; +case 244 : +case 245 : +{ + P0 = I211(4, 1, 3); + P1 = I31(4, 1); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } +} break; +case 246 : +{ + P0 = I31(4, 0); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 247 : +{ + P0 = I31(4, 3); + P2 = I31(4, 3); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 249 : +{ + P0 = I31(4, 1); + P1 = I31(4, 2); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } +} break; +case 251 : +{ + P1 = I31(4, 2); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 252 : +{ + P0 = I31(4, 0); + P1 = I31(4, 1); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } +} break; +case 253 : +{ + P0 = I31(4, 1); + P1 = I31(4, 1); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } +} break; +case 254 : +{ + P0 = I31(4, 0); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 255 : +{ + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; diff --git a/Source/GlideHQ/TextureFilters_hq4x.cpp b/Source/GlideHQ/TextureFilters_hq4x.cpp new file mode 100644 index 000000000..89c14ea21 --- /dev/null +++ b/Source/GlideHQ/TextureFilters_hq4x.cpp @@ -0,0 +1,892 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Based on Maxim Stepin and Rice1964 hq4x code */ + +#include +#include +#include "TextureFilters.h" + +#if !_16BPP_HACK +static uint32 RGB444toYUV[4096]; +#define RGB444toYUV(val) RGB444toYUV[val & 0x0FFF] /* val = ARGB4444 */ + +/*inline static uint32 RGB444toYUV(uint32 val) +{ + uint32 r, g, b, Y, u, v; + + r = (val & 0x0F00) >> 4; + g = (val & 0x00F0); + b = val & 0x000F; + r |= r >> 4; + g |= g >> 4; + b |= b << 4; + + Y = (r + g + b) >> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((2*g - r - b)>>3); + + return ((Y << 16) | (u << 8) | v); +}*/ + +static uint32 RGB555toYUV(uint32 val) +{ + uint32 r, g, b, Y, u, v; + + r = (val & 0x7C00) >> 7; + g = (val & 0x03E0) >> 2; + b = (val & 0x001F) << 3; + r |= r >> 5; + g |= g >> 5; + b |= b >> 5; + + Y = (r + g + b) >> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((2*g - r - b)>>3); + + return ((Y << 16) | (u << 8) | v); +} + +static uint32 RGB565toYUV(uint32 val) +{ + uint32 r, g, b, Y, u, v; + + r = (val & 0xF800) >> 8; + g = (val & 0x07E0) >> 3; + b = (val & 0x001F) << 3; + r |= r >> 5; + g |= g >> 6; + b |= b >> 5; + + Y = (r + g + b) >> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((2*g - r - b)>>3); + + return ((Y << 16) | (u << 8) | v); +} +#endif /* !_16BPP_HACK */ + +static uint32 RGB888toYUV(uint32 val) +{ +#if 0 + uint32 Yuv; + + __asm { + mov eax, dword ptr [val]; + mov ebx, eax; + mov ecx, eax; + and ebx, 0x000000ff; // b + and eax, 0x00ff0000; // r + and ecx, 0x0000ff00; // g + shl ebx, 14; + shr eax, 2; + shl ecx, 6; + mov edx, ebx; + add edx, eax; + add edx, ecx; + and edx, 0xffff0000; + + sub eax, ebx; + add eax, 0x00800000; + shr eax, 8; + or edx, eax; + sub eax, 0x00800000; + and edx, 0xffffff00; + + add ecx, 0x00800000; + shr ecx, 5; + shr ebx, 7; + add eax, ebx; + sub ecx, eax; + shr ecx, 11; + or edx, ecx; + + mov dword ptr [Yuv], edx; + } + + return Yuv; +#else + uint32 r, g, b, Y, u, v; + + r = (val & 0x00ff0000) >> 16; + g = (val & 0x0000ff00) >> 8; + b = val & 0x000000ff; + + Y = (r + g + b) >> 2; + u = (0x00000200 + r - b) >> 2; + v = (0x00000400 + (g << 1) - r - b) >> 3; + + return ((Y << 16) | (u << 8) | v); +#endif +} + +#define Ymask 0x00FF0000 +#define Umask 0x0000FF00 +#define Vmask 0x000000FF +#define trY 0x00300000 // ? +#define trU 0x00000700 // ?? +#define trV 0x00000006 // ??? + +#define HQ4X_INTERP1(n, b) \ +static void hq4x_Interp1_##n (uint8 * pc, uint##b p1, uint##b p2) \ +{ \ + /* *((uint##b*)pc) = (p1*3+p2) >> 2; */ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*3 + INTERP_##n##_MASK_1_3(p2)) / 4) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*3 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 4 ); \ +} + +#define HQ4X_INTERP2(n, b) \ +static void hq4x_Interp2_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \ +{ \ + /**((uint##b*)pc) = (p1*2+p2+p3) >> 2;*/ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*2 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 4) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*2 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 4); \ +} + +#define HQ4X_INTERP3(n, b) \ +static void hq4x_Interp3_##n (uint8 * pc, uint##b p1, uint##b p2) \ +{ \ + /**((uint##b*)pc) = (p1*7+p2)/8;*/ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*7 + INTERP_##n##_MASK_1_3(p2)) / 8) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*7 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 8); \ +} + +#define HQ4X_INTERP5(n, b) \ +static void hq4x_Interp5_##n (uint8 * pc, uint##b p1, uint##b p2) \ +{ \ + /**((uint##b*)pc) = (p1+p2) >> 1;*/ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1) + INTERP_##n##_MASK_1_3(p2)) / 2) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1) + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 2); \ +} + +#define HQ4X_INTERP6(n, b) \ +static void hq4x_Interp6_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \ +{ \ + /**((uint##b*)pc) = (p1*5+p2*2+p3)/8;*/ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*2 + INTERP_##n##_MASK_1_3(p3)) / 8) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*2 + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \ +} + +#define HQ4X_INTERP7(n, b) \ +static void hq4x_Interp7_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \ +{ \ + /**((uint##b*)pc) = (p1*6+p2+p3)/8;*/ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*6 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 8) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*6 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \ +} + +#define HQ4X_INTERP8(n, b) \ +static void hq4x_Interp8_##n (uint8 * pc, uint##b p1, uint##b p2) \ +{ \ + /**((uint##b*)pc) = (p1*5+p2*3)/8;*/ \ + *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*3) / 8) \ + | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*3) / 8); \ +} + +#if !_16BPP_HACK +#define INTERP_4444_MASK_1_3(v) (v & 0x0F0F) +#define INTERP_4444_MASK_SHIFT_2_4(v) ((v & 0xF0F0) >> 4) +#define INTERP_4444_MASK_SHIFTBACK_2_4(v) (INTERP_4444_MASK_1_3(v) << 4) +HQ4X_INTERP1(4444, 16) +HQ4X_INTERP2(4444, 16) +HQ4X_INTERP3(4444, 16) +HQ4X_INTERP5(4444, 16) +HQ4X_INTERP6(4444, 16) +HQ4X_INTERP7(4444, 16) +HQ4X_INTERP8(4444, 16) + +#define INTERP_1555_MASK_1_3(v) (v & 0x7C1F) +#define INTERP_1555_MASK_SHIFT_2_4(v) ((v & 0x83E0) >> 5) +#define INTERP_1555_MASK_SHIFTBACK_2_4(v) (INTERP_1555_MASK_1_3(v) << 5) +HQ4X_INTERP1(1555, 16) +HQ4X_INTERP2(1555, 16) +HQ4X_INTERP3(1555, 16) +HQ4X_INTERP5(1555, 16) +HQ4X_INTERP6(1555, 16) +HQ4X_INTERP7(1555, 16) +HQ4X_INTERP8(1555, 16) + +#define INTERP_565_MASK_1_3(v) (v & 0xF81F) +#define INTERP_565_MASK_SHIFT_2_4(v) ((v & 0x7E0) >> 5) +#define INTERP_565_MASK_SHIFTBACK_2_4(v) (INTERP_565_MASK_1_3(v) << 5) +HQ4X_INTERP1(565, 16) +HQ4X_INTERP2(565, 16) +HQ4X_INTERP3(565, 16) +HQ4X_INTERP5(565, 16) +HQ4X_INTERP6(565, 16) +HQ4X_INTERP7(565, 16) +HQ4X_INTERP8(565, 16) +#endif /* !_16BPP_HACK */ + +#define INTERP_8888_MASK_1_3(v) (v & 0x00FF00FF) +#define INTERP_8888_MASK_SHIFT_2_4(v) ((v & 0xFF00FF00) >> 8) +#define INTERP_8888_MASK_SHIFTBACK_2_4(v) (INTERP_8888_MASK_1_3(v) << 8) +HQ4X_INTERP1(8888, 32) +HQ4X_INTERP2(8888, 32) +HQ4X_INTERP3(8888, 32) +HQ4X_INTERP5(8888, 32) +HQ4X_INTERP6(8888, 32) +HQ4X_INTERP7(8888, 32) +HQ4X_INTERP8(8888, 32) + +#define PIXEL00_0 *((int*)(pOut)) = c[5]; +#define PIXEL00_11 hq4x_Interp1(pOut, c[5], c[4]); +#define PIXEL00_12 hq4x_Interp1(pOut, c[5], c[2]); +#define PIXEL00_20 hq4x_Interp2(pOut, c[5], c[2], c[4]); +#define PIXEL00_50 hq4x_Interp5(pOut, c[2], c[4]); +#define PIXEL00_80 hq4x_Interp8(pOut, c[5], c[1]); +#define PIXEL00_81 hq4x_Interp8(pOut, c[5], c[4]); +#define PIXEL00_82 hq4x_Interp8(pOut, c[5], c[2]); +#define PIXEL01_0 *((int*)(pOut+BPP)) = c[5]; +#define PIXEL01_10 hq4x_Interp1(pOut+BPP, c[5], c[1]); +#define PIXEL01_12 hq4x_Interp1(pOut+BPP, c[5], c[2]); +#define PIXEL01_14 hq4x_Interp1(pOut+BPP, c[2], c[5]); +#define PIXEL01_21 hq4x_Interp2(pOut+BPP, c[2], c[5], c[4]); +#define PIXEL01_31 hq4x_Interp3(pOut+BPP, c[5], c[4]); +#define PIXEL01_50 hq4x_Interp5(pOut+BPP, c[2], c[5]); +#define PIXEL01_60 hq4x_Interp6(pOut+BPP, c[5], c[2], c[4]); +#define PIXEL01_61 hq4x_Interp6(pOut+BPP, c[5], c[2], c[1]); +#define PIXEL01_82 hq4x_Interp8(pOut+BPP, c[5], c[2]); +#define PIXEL01_83 hq4x_Interp8(pOut+BPP, c[2], c[4]); +#define PIXEL02_0 *((int*)(pOut+BPP2)) = c[5]; +#define PIXEL02_10 hq4x_Interp1(pOut+BPP2, c[5], c[3]); +#define PIXEL02_11 hq4x_Interp1(pOut+BPP2, c[5], c[2]); +#define PIXEL02_13 hq4x_Interp1(pOut+BPP2, c[2], c[5]); +#define PIXEL02_21 hq4x_Interp2(pOut+BPP2, c[2], c[5], c[6]); +#define PIXEL02_32 hq4x_Interp3(pOut+BPP2, c[5], c[6]); +#define PIXEL02_50 hq4x_Interp5(pOut+BPP2, c[2], c[5]); +#define PIXEL02_60 hq4x_Interp6(pOut+BPP2, c[5], c[2], c[6]); +#define PIXEL02_61 hq4x_Interp6(pOut+BPP2, c[5], c[2], c[3]); +#define PIXEL02_81 hq4x_Interp8(pOut+BPP2, c[5], c[2]); +#define PIXEL02_83 hq4x_Interp8(pOut+BPP2, c[2], c[6]); +#define PIXEL03_0 *((int*)(pOut+BPP3)) = c[5]; +#define PIXEL03_11 hq4x_Interp1(pOut+BPP3, c[5], c[2]); +#define PIXEL03_12 hq4x_Interp1(pOut+BPP3, c[5], c[6]); +#define PIXEL03_20 hq4x_Interp2(pOut+BPP3, c[5], c[2], c[6]); +#define PIXEL03_50 hq4x_Interp5(pOut+BPP3, c[2], c[6]); +#define PIXEL03_80 hq4x_Interp8(pOut+BPP3, c[5], c[3]); +#define PIXEL03_81 hq4x_Interp8(pOut+BPP3, c[5], c[2]); +#define PIXEL03_82 hq4x_Interp8(pOut+BPP3, c[5], c[6]); +#define PIXEL10_0 *((int*)(pOut+BpL)) = c[5]; +#define PIXEL10_10 hq4x_Interp1(pOut+BpL, c[5], c[1]); +#define PIXEL10_11 hq4x_Interp1(pOut+BpL, c[5], c[4]); +#define PIXEL10_13 hq4x_Interp1(pOut+BpL, c[4], c[5]); +#define PIXEL10_21 hq4x_Interp2(pOut+BpL, c[4], c[5], c[2]); +#define PIXEL10_32 hq4x_Interp3(pOut+BpL, c[5], c[2]); +#define PIXEL10_50 hq4x_Interp5(pOut+BpL, c[4], c[5]); +#define PIXEL10_60 hq4x_Interp6(pOut+BpL, c[5], c[4], c[2]); +#define PIXEL10_61 hq4x_Interp6(pOut+BpL, c[5], c[4], c[1]); +#define PIXEL10_81 hq4x_Interp8(pOut+BpL, c[5], c[4]); +#define PIXEL10_83 hq4x_Interp8(pOut+BpL, c[4], c[2]); +#define PIXEL11_0 *((int*)(pOut+BpL+BPP)) = c[5]; +#define PIXEL11_30 hq4x_Interp3(pOut+BpL+BPP, c[5], c[1]); +#define PIXEL11_31 hq4x_Interp3(pOut+BpL+BPP, c[5], c[4]); +#define PIXEL11_32 hq4x_Interp3(pOut+BpL+BPP, c[5], c[2]); +#define PIXEL11_70 hq4x_Interp7(pOut+BpL+BPP, c[5], c[4], c[2]); +#define PIXEL12_0 *((int*)(pOut+BpL+BPP2)) = c[5]; +#define PIXEL12_30 hq4x_Interp3(pOut+BpL+BPP2, c[5], c[3]); +#define PIXEL12_31 hq4x_Interp3(pOut+BpL+BPP2, c[5], c[2]); +#define PIXEL12_32 hq4x_Interp3(pOut+BpL+BPP2, c[5], c[6]); +#define PIXEL12_70 hq4x_Interp7(pOut+BpL+BPP2, c[5], c[6], c[2]); +#define PIXEL13_0 *((int*)(pOut+BpL+BPP3)) = c[5]; +#define PIXEL13_10 hq4x_Interp1(pOut+BpL+BPP3, c[5], c[3]); +#define PIXEL13_12 hq4x_Interp1(pOut+BpL+BPP3, c[5], c[6]); +#define PIXEL13_14 hq4x_Interp1(pOut+BpL+BPP3, c[6], c[5]); +#define PIXEL13_21 hq4x_Interp2(pOut+BpL+BPP3, c[6], c[5], c[2]); +#define PIXEL13_31 hq4x_Interp3(pOut+BpL+BPP3, c[5], c[2]); +#define PIXEL13_50 hq4x_Interp5(pOut+BpL+BPP3, c[6], c[5]); +#define PIXEL13_60 hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[2]); +#define PIXEL13_61 hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[3]); +#define PIXEL13_82 hq4x_Interp8(pOut+BpL+BPP3, c[5], c[6]); +#define PIXEL13_83 hq4x_Interp8(pOut+BpL+BPP3, c[6], c[2]); +#define PIXEL20_0 *((int*)(pOut+BpL+BpL)) = c[5]; +#define PIXEL20_10 hq4x_Interp1(pOut+BpL+BpL, c[5], c[7]); +#define PIXEL20_12 hq4x_Interp1(pOut+BpL+BpL, c[5], c[4]); +#define PIXEL20_14 hq4x_Interp1(pOut+BpL+BpL, c[4], c[5]); +#define PIXEL20_21 hq4x_Interp2(pOut+BpL+BpL, c[4], c[5], c[8]); +#define PIXEL20_31 hq4x_Interp3(pOut+BpL+BpL, c[5], c[8]); +#define PIXEL20_50 hq4x_Interp5(pOut+BpL+BpL, c[4], c[5]); +#define PIXEL20_60 hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[8]); +#define PIXEL20_61 hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[7]); +#define PIXEL20_82 hq4x_Interp8(pOut+BpL+BpL, c[5], c[4]); +#define PIXEL20_83 hq4x_Interp8(pOut+BpL+BpL, c[4], c[8]); +#define PIXEL21_0 *((int*)(pOut+BpL+BpL+BPP)) = c[5]; +#define PIXEL21_30 hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[7]); +#define PIXEL21_31 hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[8]); +#define PIXEL21_32 hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[4]); +#define PIXEL21_70 hq4x_Interp7(pOut+BpL+BpL+BPP, c[5], c[4], c[8]); +#define PIXEL22_0 *((int*)(pOut+BpL+BpL+BPP2)) = c[5]; +#define PIXEL22_30 hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[9]); +#define PIXEL22_31 hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[6]); +#define PIXEL22_32 hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[8]); +#define PIXEL22_70 hq4x_Interp7(pOut+BpL+BpL+BPP2, c[5], c[6], c[8]); +#define PIXEL23_0 *((int*)(pOut+BpL+BpL+BPP3)) = c[5]; +#define PIXEL23_10 hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[9]); +#define PIXEL23_11 hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[6]); +#define PIXEL23_13 hq4x_Interp1(pOut+BpL+BpL+BPP3, c[6], c[5]); +#define PIXEL23_21 hq4x_Interp2(pOut+BpL+BpL+BPP3, c[6], c[5], c[8]); +#define PIXEL23_32 hq4x_Interp3(pOut+BpL+BpL+BPP3, c[5], c[8]); +#define PIXEL23_50 hq4x_Interp5(pOut+BpL+BpL+BPP3, c[6], c[5]); +#define PIXEL23_60 hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[8]); +#define PIXEL23_61 hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[9]); +#define PIXEL23_81 hq4x_Interp8(pOut+BpL+BpL+BPP3, c[5], c[6]); +#define PIXEL23_83 hq4x_Interp8(pOut+BpL+BpL+BPP3, c[6], c[8]); +#define PIXEL30_0 *((int*)(pOut+BpL+BpL+BpL)) = c[5]; +#define PIXEL30_11 hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[8]); +#define PIXEL30_12 hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[4]); +#define PIXEL30_20 hq4x_Interp2(pOut+BpL+BpL+BpL, c[5], c[8], c[4]); +#define PIXEL30_50 hq4x_Interp5(pOut+BpL+BpL+BpL, c[8], c[4]); +#define PIXEL30_80 hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[7]); +#define PIXEL30_81 hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[8]); +#define PIXEL30_82 hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[4]); +#define PIXEL31_0 *((int*)(pOut+BpL+BpL+BpL+BPP)) = c[5]; +#define PIXEL31_10 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[7]); +#define PIXEL31_11 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[8]); +#define PIXEL31_13 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[8], c[5]); +#define PIXEL31_21 hq4x_Interp2(pOut+BpL+BpL+BpL+BPP, c[8], c[5], c[4]); +#define PIXEL31_32 hq4x_Interp3(pOut+BpL+BpL+BpL+BPP, c[5], c[4]); +#define PIXEL31_50 hq4x_Interp5(pOut+BpL+BpL+BpL+BPP, c[8], c[5]); +#define PIXEL31_60 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[4]); +#define PIXEL31_61 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[7]); +#define PIXEL31_81 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[5], c[8]); +#define PIXEL31_83 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[8], c[4]); +#define PIXEL32_0 *((int*)(pOut+BpL+BpL+BpL+BPP2)) = c[5]; +#define PIXEL32_10 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[9]); +#define PIXEL32_12 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]); +#define PIXEL32_14 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]); +#define PIXEL32_21 hq4x_Interp2(pOut+BpL+BpL+BpL+BPP2, c[8], c[5], c[6]); +#define PIXEL32_31 hq4x_Interp3(pOut+BpL+BpL+BpL+BPP2, c[5], c[6]); +#define PIXEL32_50 hq4x_Interp5(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]); +#define PIXEL32_60 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[6]); +#define PIXEL32_61 hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[9]); +#define PIXEL32_82 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]); +#define PIXEL32_83 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[8], c[6]); +#define PIXEL33_0 *((int*)(pOut+BpL+BpL+BpL+BPP3)) = c[5]; +#define PIXEL33_11 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]); +#define PIXEL33_12 hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]); +#define PIXEL33_20 hq4x_Interp2(pOut+BpL+BpL+BpL+BPP3, c[5], c[8], c[6]); +#define PIXEL33_50 hq4x_Interp5(pOut+BpL+BpL+BpL+BPP3, c[8], c[6]); +#define PIXEL33_80 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[9]); +#define PIXEL33_81 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]); +#define PIXEL33_82 hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]); + +#define HQ4X_DIFF(n, b) \ +static int Diff_##n (uint##b w1, uint##b w2) \ +{ \ + int YUV1, YUV2; \ + YUV1 = RGB##n##toYUV(w1); \ + YUV2 = RGB##n##toYUV(w2); \ + return ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) || \ + ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || \ + ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ); \ +} + +HQ4X_DIFF(888, 32) + +#if !_16BPP_HACK +HQ4X_DIFF(444, 16) +HQ4X_DIFF(555, 16) +HQ4X_DIFF(565, 16) + +void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL) +{ +#define hq4x_Interp1 hq4x_Interp1_4444 +#define hq4x_Interp2 hq4x_Interp2_4444 +#define hq4x_Interp3 hq4x_Interp3_4444 +#define hq4x_Interp4 hq4x_Interp4_4444 +#define hq4x_Interp5 hq4x_Interp5_4444 +#define hq4x_Interp6 hq4x_Interp6_4444 +#define hq4x_Interp7 hq4x_Interp7_4444 +#define hq4x_Interp8 hq4x_Interp8_4444 +#define Diff Diff_444 +#define BPP 2 +#define BPP2 4 +#define BPP3 6 + + int i, j, k; + int prevline, nextline; + uint16 w[10]; + uint16 c[10]; + + int pattern; + int flag; + + int YUV1, YUV2; + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) { + if (j>0) prevline = -SrcPPL*2; else prevline = 0; + if (j0) { + w[1] = *((uint16*)(pIn + prevline - 2)); + w[4] = *((uint16*)(pIn - 2)); + w[7] = *((uint16*)(pIn + nextline - 2)); + } else { + w[1] = w[2]; + w[4] = w[5]; + w[7] = w[8]; + } + + if (i trY ) || + ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || + ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ) + pattern |= flag; + } + flag <<= 1; + } + + for (k=1; k<=9; k++) + c[k] = w[k]; + +#include "TextureFilters_hq4x.h" + + pIn+=2; + pOut+=8; + } + pIn += 2*(SrcPPL-Xres); + pOut+= 8*(SrcPPL-Xres); + pOut+=BpL; + pOut+=BpL; + pOut+=BpL; + } + +#undef BPP +#undef BPP2 +#undef BPP3 +#undef Diff +#undef hq4x_Interp1 +#undef hq4x_Interp2 +#undef hq4x_Interp3 +#undef hq4x_Interp4 +#undef hq4x_Interp5 +#undef hq4x_Interp6 +#undef hq4x_Interp7 +#undef hq4x_Interp8 +} + +void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL) +{ +#define hq4x_Interp1 hq4x_Interp1_1555 +#define hq4x_Interp2 hq4x_Interp2_1555 +#define hq4x_Interp3 hq4x_Interp3_1555 +#define hq4x_Interp4 hq4x_Interp4_1555 +#define hq4x_Interp5 hq4x_Interp5_1555 +#define hq4x_Interp6 hq4x_Interp6_1555 +#define hq4x_Interp7 hq4x_Interp7_1555 +#define hq4x_Interp8 hq4x_Interp8_1555 +#define Diff Diff_555 +#define BPP 2 +#define BPP2 4 +#define BPP3 6 + + int i, j, k; + int prevline, nextline; + uint16 w[10]; + uint16 c[10]; + + int pattern; + int flag; + + int YUV1, YUV2; + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) { + if (j>0) prevline = -SrcPPL*2; else prevline = 0; + if (j0) { + w[1] = *((uint16*)(pIn + prevline - 2)); + w[4] = *((uint16*)(pIn - 2)); + w[7] = *((uint16*)(pIn + nextline - 2)); + } else { + w[1] = w[2]; + w[4] = w[5]; + w[7] = w[8]; + } + + if (i trY ) || + ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || + ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ) + pattern |= flag; + } + flag <<= 1; + } + + for (k=1; k<=9; k++) + c[k] = w[k]; + +#include "TextureFilters_hq4x.h" + + pIn+=2; + pOut+=8; + } + pIn += 2*(SrcPPL-Xres); + pOut+= 8*(SrcPPL-Xres); + pOut+=BpL; + pOut+=BpL; + pOut+=BpL; + } + +#undef BPP +#undef BPP2 +#undef BPP3 +#undef Diff +#undef hq4x_Interp1 +#undef hq4x_Interp2 +#undef hq4x_Interp3 +#undef hq4x_Interp4 +#undef hq4x_Interp5 +#undef hq4x_Interp6 +#undef hq4x_Interp7 +#undef hq4x_Interp8 +} + +void hq4x_565(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL) +{ +#define hq4x_Interp1 hq4x_Interp1_565 +#define hq4x_Interp2 hq4x_Interp2_565 +#define hq4x_Interp3 hq4x_Interp3_565 +#define hq4x_Interp4 hq4x_Interp4_565 +#define hq4x_Interp5 hq4x_Interp5_565 +#define hq4x_Interp6 hq4x_Interp6_565 +#define hq4x_Interp7 hq4x_Interp7_565 +#define hq4x_Interp8 hq4x_Interp8_565 +#define Diff Diff_565 +#define BPP 2 +#define BPP2 4 +#define BPP3 6 + + int i, j, k; + int prevline, nextline; + uint16 w[10]; + uint16 c[10]; + + int pattern; + int flag; + + int YUV1, YUV2; + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) { + if (j>0) prevline = -SrcPPL*2; else prevline = 0; + if (j0) { + w[1] = *((uint16*)(pIn + prevline - 2)); + w[4] = *((uint16*)(pIn - 2)); + w[7] = *((uint16*)(pIn + nextline - 2)); + } else { + w[1] = w[2]; + w[4] = w[5]; + w[7] = w[8]; + } + + if (i trY ) || + ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || + ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ) + pattern |= flag; + } + flag <<= 1; + } + + for (k=1; k<=9; k++) + c[k] = w[k]; + +#include "TextureFilters_hq4x.h" + + pIn+=2; + pOut+=8; + } + pIn += 2*(SrcPPL-Xres); + pOut+= 8*(SrcPPL-Xres); + pOut+=BpL; + pOut+=BpL; + pOut+=BpL; + } + +#undef BPP +#undef BPP2 +#undef BPP3 +#undef Diff +#undef hq4x_Interp1 +#undef hq4x_Interp2 +#undef hq4x_Interp3 +#undef hq4x_Interp4 +#undef hq4x_Interp5 +#undef hq4x_Interp6 +#undef hq4x_Interp7 +#undef hq4x_Interp8 +} +#endif /* !_16BPP_HACK */ + +void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL) +{ +#define hq4x_Interp1 hq4x_Interp1_8888 +#define hq4x_Interp2 hq4x_Interp2_8888 +#define hq4x_Interp3 hq4x_Interp3_8888 +#define hq4x_Interp4 hq4x_Interp4_8888 +#define hq4x_Interp5 hq4x_Interp5_8888 +#define hq4x_Interp6 hq4x_Interp6_8888 +#define hq4x_Interp7 hq4x_Interp7_8888 +#define hq4x_Interp8 hq4x_Interp8_8888 +#define Diff Diff_888 +#define BPP 4 +#define BPP2 8 +#define BPP3 12 + + int i, j, k; + int prevline, nextline; + uint32 w[10]; + uint32 c[10]; + + int pattern; + int flag; + + int YUV1, YUV2; + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + + for (j = 0; j < Yres; j++) { + if (j>0) prevline = -SrcPPL*4; else prevline = 0; + if (j0) { + w[1] = *((uint32*)(pIn + prevline - 4)); + w[4] = *((uint32*)(pIn - 4)); + w[7] = *((uint32*)(pIn + nextline - 4)); + } else { + w[1] = w[2]; + w[4] = w[5]; + w[7] = w[8]; + } + + if (i trY ) || + ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || + ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ) + pattern |= flag; + } + flag <<= 1; + } + + for (k=1; k<=9; k++) + c[k] = w[k]; + +#include "TextureFilters_hq4x.h" + + pIn+=4; + pOut+=16; + } + + pIn += 4*(SrcPPL-Xres); + pOut+= 16*(SrcPPL-Xres); + pOut+=BpL; + pOut+=BpL; + pOut+=BpL; + } + +#undef BPP +#undef BPP2 +#undef BPP3 +#undef Diff +#undef hq4x_Interp1 +#undef hq4x_Interp2 +#undef hq4x_Interp3 +#undef hq4x_Interp4 +#undef hq4x_Interp5 +#undef hq4x_Interp6 +#undef hq4x_Interp7 +#undef hq4x_Interp8 +} + +#if !_16BPP_HACK +void hq4x_init(void) +{ + static int done = 0; + int r, g, b, Y, u, v, i, j, k; + + if (done ) return; + + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) { + for (k = 0; k < 16; k++) { + r = (i << 4) | i; + g = (j << 4) | j; + b = (k << 4) | k; + + /* Microsoft's RGB888->YUV conversion */ + /*Y = ((( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16) & 0xFF; + u = ((( -38 * r - 74 * g + 112 * b + 128) >> 8) + 128) & 0xFF; + v = ((( 112 * r - 94 * g - 18 * b + 128) >> 8) + 128) & 0xFF;*/ + + Y = (r + g + b) >> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((-r + 2*g -b)>>3); + + RGB444toYUV[(i << 8) | (j << 4) | k] = (Y << 16) | (u << 8) | v; + } + } + } + + done = 1; +} +#endif /* !_16BPP_HACK */ diff --git a/Source/GlideHQ/TextureFilters_hq4x.h b/Source/GlideHQ/TextureFilters_hq4x.h new file mode 100644 index 000000000..a3a27403c --- /dev/null +++ b/Source/GlideHQ/TextureFilters_hq4x.h @@ -0,0 +1,4999 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Based on Maxim Stepin and Rice1964 hq4x code */ + + switch (pattern) + { + case 0: + case 1: + case 4: + case 32: + case 128: + case 5: + case 132: + case 160: + case 33: + case 129: + case 36: + case 133: + case 164: + case 161: + case 37: + case 165: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_60 + PIXEL03_20 + PIXEL10_60 + PIXEL11_70 + PIXEL12_70 + PIXEL13_60 + PIXEL20_60 + PIXEL21_70 + PIXEL22_70 + PIXEL23_60 + PIXEL30_20 + PIXEL31_60 + PIXEL32_60 + PIXEL33_20 + break; + } + case 2: + case 34: + case 130: + case 162: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_61 + PIXEL11_30 + PIXEL12_30 + PIXEL13_61 + PIXEL20_60 + PIXEL21_70 + PIXEL22_70 + PIXEL23_60 + PIXEL30_20 + PIXEL31_60 + PIXEL32_60 + PIXEL33_20 + break; + } + case 16: + case 17: + case 48: + case 49: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_61 + PIXEL03_80 + PIXEL10_60 + PIXEL11_70 + PIXEL12_30 + PIXEL13_10 + PIXEL20_60 + PIXEL21_70 + PIXEL22_30 + PIXEL23_10 + PIXEL30_20 + PIXEL31_60 + PIXEL32_61 + PIXEL33_80 + break; + } + case 64: + case 65: + case 68: + case 69: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_60 + PIXEL03_20 + PIXEL10_60 + PIXEL11_70 + PIXEL12_70 + PIXEL13_60 + PIXEL20_61 + PIXEL21_30 + PIXEL22_30 + PIXEL23_61 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 8: + case 12: + case 136: + case 140: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_60 + PIXEL03_20 + PIXEL10_10 + PIXEL11_30 + PIXEL12_70 + PIXEL13_60 + PIXEL20_10 + PIXEL21_30 + PIXEL22_70 + PIXEL23_60 + PIXEL30_80 + PIXEL31_61 + PIXEL32_60 + PIXEL33_20 + break; + } + case 3: + case 35: + case 131: + case 163: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_61 + PIXEL20_60 + PIXEL21_70 + PIXEL22_70 + PIXEL23_60 + PIXEL30_20 + PIXEL31_60 + PIXEL32_60 + PIXEL33_20 + break; + } + case 6: + case 38: + case 134: + case 166: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_61 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + PIXEL20_60 + PIXEL21_70 + PIXEL22_70 + PIXEL23_60 + PIXEL30_20 + PIXEL31_60 + PIXEL32_60 + PIXEL33_20 + break; + } + case 20: + case 21: + case 52: + case 53: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + PIXEL03_81 + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL13_31 + PIXEL20_60 + PIXEL21_70 + PIXEL22_30 + PIXEL23_10 + PIXEL30_20 + PIXEL31_60 + PIXEL32_61 + PIXEL33_80 + break; + } + case 144: + case 145: + case 176: + case 177: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_61 + PIXEL03_80 + PIXEL10_60 + PIXEL11_70 + PIXEL12_30 + PIXEL13_10 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL23_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + PIXEL33_82 + break; + } + case 192: + case 193: + case 196: + case 197: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_60 + PIXEL03_20 + PIXEL10_60 + PIXEL11_70 + PIXEL12_70 + PIXEL13_60 + PIXEL20_61 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 96: + case 97: + case 100: + case 101: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_60 + PIXEL03_20 + PIXEL10_60 + PIXEL11_70 + PIXEL12_70 + PIXEL13_60 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_61 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 40: + case 44: + case 168: + case 172: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_60 + PIXEL03_20 + PIXEL10_10 + PIXEL11_30 + PIXEL12_70 + PIXEL13_60 + PIXEL20_31 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL30_81 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 9: + case 13: + case 137: + case 141: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL10_32 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + PIXEL20_10 + PIXEL21_30 + PIXEL22_70 + PIXEL23_60 + PIXEL30_80 + PIXEL31_61 + PIXEL32_60 + PIXEL33_20 + break; + } + case 18: + case 50: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL12_0 + PIXEL13_50 + } + PIXEL10_61 + PIXEL11_30 + PIXEL20_60 + PIXEL21_70 + PIXEL22_30 + PIXEL23_10 + PIXEL30_20 + PIXEL31_60 + PIXEL32_61 + PIXEL33_80 + break; + } + case 80: + case 81: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_61 + PIXEL03_80 + PIXEL10_60 + PIXEL11_70 + PIXEL12_30 + PIXEL13_10 + PIXEL20_61 + PIXEL21_30 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 72: + case 76: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_60 + PIXEL03_20 + PIXEL10_10 + PIXEL11_30 + PIXEL12_70 + PIXEL13_60 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_50 + PIXEL21_0 + PIXEL30_50 + PIXEL31_50 + } + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 10: + case 138: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + PIXEL11_0 + } + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_61 + PIXEL20_10 + PIXEL21_30 + PIXEL22_70 + PIXEL23_60 + PIXEL30_80 + PIXEL31_61 + PIXEL32_60 + PIXEL33_20 + break; + } + case 66: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_61 + PIXEL11_30 + PIXEL12_30 + PIXEL13_61 + PIXEL20_61 + PIXEL21_30 + PIXEL22_30 + PIXEL23_61 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 24: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 7: + case 39: + case 135: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_32 + PIXEL03_82 + PIXEL10_81 + PIXEL11_31 + PIXEL12_32 + PIXEL13_82 + PIXEL20_60 + PIXEL21_70 + PIXEL22_70 + PIXEL23_60 + PIXEL30_20 + PIXEL31_60 + PIXEL32_60 + PIXEL33_20 + break; + } + case 148: + case 149: + case 180: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + PIXEL03_81 + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL13_31 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL23_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + PIXEL33_82 + break; + } + case 224: + case 228: + case 225: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_60 + PIXEL03_20 + PIXEL10_60 + PIXEL11_70 + PIXEL12_70 + PIXEL13_60 + PIXEL20_82 + PIXEL21_32 + PIXEL22_31 + PIXEL23_81 + PIXEL30_82 + PIXEL31_32 + PIXEL32_31 + PIXEL33_81 + break; + } + case 41: + case 169: + case 45: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL10_32 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + PIXEL20_31 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL30_81 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 22: + case 54: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_61 + PIXEL11_30 + PIXEL12_0 + PIXEL20_60 + PIXEL21_70 + PIXEL22_30 + PIXEL23_10 + PIXEL30_20 + PIXEL31_60 + PIXEL32_61 + PIXEL33_80 + break; + } + case 208: + case 209: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_61 + PIXEL03_80 + PIXEL10_60 + PIXEL11_70 + PIXEL12_30 + PIXEL13_10 + PIXEL20_61 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 104: + case 108: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_60 + PIXEL03_20 + PIXEL10_10 + PIXEL11_30 + PIXEL12_70 + PIXEL13_60 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 11: + case 139: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_61 + PIXEL20_10 + PIXEL21_30 + PIXEL22_70 + PIXEL23_60 + PIXEL30_80 + PIXEL31_61 + PIXEL32_60 + PIXEL33_20 + break; + } + case 19: + case 51: + { + if (Diff(w[2], w[6])) + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL00_12 + PIXEL01_14 + PIXEL02_83 + PIXEL03_50 + PIXEL12_70 + PIXEL13_21 + } + PIXEL10_81 + PIXEL11_31 + PIXEL20_60 + PIXEL21_70 + PIXEL22_30 + PIXEL23_10 + PIXEL30_20 + PIXEL31_60 + PIXEL32_61 + PIXEL33_80 + break; + } + case 146: + case 178: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + PIXEL23_32 + PIXEL33_82 + } + else + { + PIXEL02_21 + PIXEL03_50 + PIXEL12_70 + PIXEL13_83 + PIXEL23_13 + PIXEL33_11 + } + PIXEL10_61 + PIXEL11_30 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + break; + } + case 84: + case 85: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + if (Diff(w[6], w[8])) + { + PIXEL03_81 + PIXEL13_31 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL03_12 + PIXEL13_14 + PIXEL22_70 + PIXEL23_83 + PIXEL32_21 + PIXEL33_50 + } + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL20_61 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + break; + } + case 112: + case 113: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_61 + PIXEL03_80 + PIXEL10_60 + PIXEL11_70 + PIXEL12_30 + PIXEL13_10 + PIXEL20_82 + PIXEL21_32 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_70 + PIXEL23_21 + PIXEL30_11 + PIXEL31_13 + PIXEL32_83 + PIXEL33_50 + } + break; + } + case 200: + case 204: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_60 + PIXEL03_20 + PIXEL10_10 + PIXEL11_30 + PIXEL12_70 + PIXEL13_60 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + } + else + { + PIXEL20_21 + PIXEL21_70 + PIXEL30_50 + PIXEL31_83 + PIXEL32_14 + PIXEL33_12 + } + PIXEL22_31 + PIXEL23_81 + break; + } + case 73: + case 77: + { + if (Diff(w[8], w[4])) + { + PIXEL00_82 + PIXEL10_32 + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL00_11 + PIXEL10_13 + PIXEL20_83 + PIXEL21_70 + PIXEL30_50 + PIXEL31_21 + } + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 42: + case 170: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + PIXEL20_31 + PIXEL30_81 + } + else + { + PIXEL00_50 + PIXEL01_21 + PIXEL10_83 + PIXEL11_70 + PIXEL20_14 + PIXEL30_12 + } + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_61 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 14: + case 142: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_50 + PIXEL01_83 + PIXEL02_13 + PIXEL03_11 + PIXEL10_21 + PIXEL11_70 + } + PIXEL12_32 + PIXEL13_82 + PIXEL20_10 + PIXEL21_30 + PIXEL22_70 + PIXEL23_60 + PIXEL30_80 + PIXEL31_61 + PIXEL32_60 + PIXEL33_20 + break; + } + case 67: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_61 + PIXEL20_61 + PIXEL21_30 + PIXEL22_30 + PIXEL23_61 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 70: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_61 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + PIXEL20_61 + PIXEL21_30 + PIXEL22_30 + PIXEL23_61 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 28: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 152: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 194: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_61 + PIXEL11_30 + PIXEL12_30 + PIXEL13_61 + PIXEL20_61 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 98: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_61 + PIXEL11_30 + PIXEL12_30 + PIXEL13_61 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_61 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 56: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 25: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 26: + case 31: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL11_0 + PIXEL12_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 82: + case 214: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_61 + PIXEL11_30 + PIXEL12_0 + PIXEL20_61 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 88: + case 248: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + break; + } + case 74: + case 107: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_61 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 27: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 86: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_61 + PIXEL11_30 + PIXEL12_0 + PIXEL20_61 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 216: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 106: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_61 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 30: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_10 + PIXEL11_30 + PIXEL12_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 210: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_61 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + PIXEL20_61 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 120: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 75: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_61 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_61 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 29: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_61 + PIXEL32_61 + PIXEL33_80 + break; + } + case 198: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_61 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + PIXEL20_61 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 184: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_61 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 99: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_61 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_61 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 57: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 71: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_32 + PIXEL03_82 + PIXEL10_81 + PIXEL11_31 + PIXEL12_32 + PIXEL13_82 + PIXEL20_61 + PIXEL21_30 + PIXEL22_30 + PIXEL23_61 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 156: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 226: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_61 + PIXEL11_30 + PIXEL12_30 + PIXEL13_61 + PIXEL20_82 + PIXEL21_32 + PIXEL22_31 + PIXEL23_81 + PIXEL30_82 + PIXEL31_32 + PIXEL32_31 + PIXEL33_81 + break; + } + case 60: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 195: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_61 + PIXEL20_61 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 102: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_61 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_61 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 153: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 58: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 83: + { + PIXEL00_81 + PIXEL01_31 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL10_81 + PIXEL11_31 + PIXEL20_61 + PIXEL21_30 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 92: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 202: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_61 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_31 + PIXEL23_81 + PIXEL32_31 + PIXEL33_81 + break; + } + case 78: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + PIXEL02_32 + PIXEL03_82 + PIXEL12_32 + PIXEL13_82 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 154: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 114: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL10_61 + PIXEL11_30 + PIXEL20_82 + PIXEL21_32 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + PIXEL30_82 + PIXEL31_32 + break; + } + case 89: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 90: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 55: + case 23: + { + if (Diff(w[2], w[6])) + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_0 + PIXEL03_0 + PIXEL12_0 + PIXEL13_0 + } + else + { + PIXEL00_12 + PIXEL01_14 + PIXEL02_83 + PIXEL03_50 + PIXEL12_70 + PIXEL13_21 + } + PIXEL10_81 + PIXEL11_31 + PIXEL20_60 + PIXEL21_70 + PIXEL22_30 + PIXEL23_10 + PIXEL30_20 + PIXEL31_60 + PIXEL32_61 + PIXEL33_80 + break; + } + case 182: + case 150: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL12_0 + PIXEL13_0 + PIXEL23_32 + PIXEL33_82 + } + else + { + PIXEL02_21 + PIXEL03_50 + PIXEL12_70 + PIXEL13_83 + PIXEL23_13 + PIXEL33_11 + } + PIXEL10_61 + PIXEL11_30 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + break; + } + case 213: + case 212: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + if (Diff(w[6], w[8])) + { + PIXEL03_81 + PIXEL13_31 + PIXEL22_0 + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL03_12 + PIXEL13_14 + PIXEL22_70 + PIXEL23_83 + PIXEL32_21 + PIXEL33_50 + } + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL20_61 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + break; + } + case 241: + case 240: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_61 + PIXEL03_80 + PIXEL10_60 + PIXEL11_70 + PIXEL12_30 + PIXEL13_10 + PIXEL20_82 + PIXEL21_32 + if (Diff(w[6], w[8])) + { + PIXEL22_0 + PIXEL23_0 + PIXEL30_82 + PIXEL31_32 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL22_70 + PIXEL23_21 + PIXEL30_11 + PIXEL31_13 + PIXEL32_83 + PIXEL33_50 + } + break; + } + case 236: + case 232: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_60 + PIXEL03_20 + PIXEL10_10 + PIXEL11_30 + PIXEL12_70 + PIXEL13_60 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL21_0 + PIXEL30_0 + PIXEL31_0 + PIXEL32_31 + PIXEL33_81 + } + else + { + PIXEL20_21 + PIXEL21_70 + PIXEL30_50 + PIXEL31_83 + PIXEL32_14 + PIXEL33_12 + } + PIXEL22_31 + PIXEL23_81 + break; + } + case 109: + case 105: + { + if (Diff(w[8], w[4])) + { + PIXEL00_82 + PIXEL10_32 + PIXEL20_0 + PIXEL21_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL00_11 + PIXEL10_13 + PIXEL20_83 + PIXEL21_70 + PIXEL30_50 + PIXEL31_21 + } + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 171: + case 43: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + PIXEL11_0 + PIXEL20_31 + PIXEL30_81 + } + else + { + PIXEL00_50 + PIXEL01_21 + PIXEL10_83 + PIXEL11_70 + PIXEL20_14 + PIXEL30_12 + } + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_61 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 143: + case 15: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL02_32 + PIXEL03_82 + PIXEL10_0 + PIXEL11_0 + } + else + { + PIXEL00_50 + PIXEL01_83 + PIXEL02_13 + PIXEL03_11 + PIXEL10_21 + PIXEL11_70 + } + PIXEL12_32 + PIXEL13_82 + PIXEL20_10 + PIXEL21_30 + PIXEL22_70 + PIXEL23_60 + PIXEL30_80 + PIXEL31_61 + PIXEL32_60 + PIXEL33_20 + break; + } + case 124: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 203: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_61 + PIXEL20_10 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 62: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_10 + PIXEL11_30 + PIXEL12_0 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 211: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_10 + PIXEL20_61 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 118: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_61 + PIXEL11_30 + PIXEL12_0 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_10 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 217: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 110: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_10 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 155: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 188: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 185: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 61: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 157: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 103: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_32 + PIXEL03_82 + PIXEL10_81 + PIXEL11_31 + PIXEL12_32 + PIXEL13_82 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_61 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 227: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_61 + PIXEL20_82 + PIXEL21_32 + PIXEL22_31 + PIXEL23_81 + PIXEL30_82 + PIXEL31_32 + PIXEL32_31 + PIXEL33_81 + break; + } + case 230: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_61 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + PIXEL20_82 + PIXEL21_32 + PIXEL22_31 + PIXEL23_81 + PIXEL30_82 + PIXEL31_32 + PIXEL32_31 + PIXEL33_81 + break; + } + case 199: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_32 + PIXEL03_82 + PIXEL10_81 + PIXEL11_31 + PIXEL12_32 + PIXEL13_82 + PIXEL20_61 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 220: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + break; + } + case 158: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL12_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 234: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_61 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_31 + PIXEL23_81 + PIXEL32_31 + PIXEL33_81 + break; + } + case 242: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL10_61 + PIXEL11_30 + PIXEL20_82 + PIXEL21_32 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_82 + PIXEL31_32 + break; + } + case 59: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL11_0 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 121: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 87: + { + PIXEL00_81 + PIXEL01_31 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_81 + PIXEL11_31 + PIXEL12_0 + PIXEL20_61 + PIXEL21_30 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 79: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_32 + PIXEL03_82 + PIXEL11_0 + PIXEL12_32 + PIXEL13_82 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 122: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 94: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL12_0 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 218: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + break; + } + case 91: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL11_0 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 229: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_60 + PIXEL03_20 + PIXEL10_60 + PIXEL11_70 + PIXEL12_70 + PIXEL13_60 + PIXEL20_82 + PIXEL21_32 + PIXEL22_31 + PIXEL23_81 + PIXEL30_82 + PIXEL31_32 + PIXEL32_31 + PIXEL33_81 + break; + } + case 167: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_32 + PIXEL03_82 + PIXEL10_81 + PIXEL11_31 + PIXEL12_32 + PIXEL13_82 + PIXEL20_60 + PIXEL21_70 + PIXEL22_70 + PIXEL23_60 + PIXEL30_20 + PIXEL31_60 + PIXEL32_60 + PIXEL33_20 + break; + } + case 173: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL10_32 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + PIXEL20_31 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL30_81 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 181: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + PIXEL03_81 + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL13_31 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL23_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + PIXEL33_82 + break; + } + case 186: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 115: + { + PIXEL00_81 + PIXEL01_31 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL10_81 + PIXEL11_31 + PIXEL20_82 + PIXEL21_32 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + PIXEL30_82 + PIXEL31_32 + break; + } + case 93: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + break; + } + case 206: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + PIXEL02_32 + PIXEL03_82 + PIXEL12_32 + PIXEL13_82 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_31 + PIXEL23_81 + PIXEL32_31 + PIXEL33_81 + break; + } + case 205: + case 201: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL10_32 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + if (Diff(w[8], w[4])) + { + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + } + else + { + PIXEL20_12 + PIXEL21_0 + PIXEL30_20 + PIXEL31_11 + } + PIXEL22_31 + PIXEL23_81 + PIXEL32_31 + PIXEL33_81 + break; + } + case 174: + case 46: + { + if (Diff(w[4], w[2])) + { + PIXEL00_80 + PIXEL01_10 + PIXEL10_10 + PIXEL11_30 + } + else + { + PIXEL00_20 + PIXEL01_12 + PIXEL10_11 + PIXEL11_0 + } + PIXEL02_32 + PIXEL03_82 + PIXEL12_32 + PIXEL13_82 + PIXEL20_31 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL30_81 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 179: + case 147: + { + PIXEL00_81 + PIXEL01_31 + if (Diff(w[2], w[6])) + { + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + } + else + { + PIXEL02_11 + PIXEL03_20 + PIXEL12_0 + PIXEL13_12 + } + PIXEL10_81 + PIXEL11_31 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL23_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + PIXEL33_82 + break; + } + case 117: + case 116: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + PIXEL03_81 + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL13_31 + PIXEL20_82 + PIXEL21_32 + if (Diff(w[6], w[8])) + { + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + } + else + { + PIXEL22_0 + PIXEL23_11 + PIXEL32_12 + PIXEL33_20 + } + PIXEL30_82 + PIXEL31_32 + break; + } + case 189: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 231: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_32 + PIXEL03_82 + PIXEL10_81 + PIXEL11_31 + PIXEL12_32 + PIXEL13_82 + PIXEL20_82 + PIXEL21_32 + PIXEL22_31 + PIXEL23_81 + PIXEL30_82 + PIXEL31_32 + PIXEL32_31 + PIXEL33_81 + break; + } + case 126: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_10 + PIXEL11_30 + PIXEL12_0 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 219: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_10 + PIXEL20_10 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 125: + { + if (Diff(w[8], w[4])) + { + PIXEL00_82 + PIXEL10_32 + PIXEL20_0 + PIXEL21_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL00_11 + PIXEL10_13 + PIXEL20_83 + PIXEL21_70 + PIXEL30_50 + PIXEL31_21 + } + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 221: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + if (Diff(w[6], w[8])) + { + PIXEL03_81 + PIXEL13_31 + PIXEL22_0 + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL03_12 + PIXEL13_14 + PIXEL22_70 + PIXEL23_83 + PIXEL32_21 + PIXEL33_50 + } + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL20_10 + PIXEL21_30 + PIXEL30_80 + PIXEL31_10 + break; + } + case 207: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL02_32 + PIXEL03_82 + PIXEL10_0 + PIXEL11_0 + } + else + { + PIXEL00_50 + PIXEL01_83 + PIXEL02_13 + PIXEL03_11 + PIXEL10_21 + PIXEL11_70 + } + PIXEL12_32 + PIXEL13_82 + PIXEL20_10 + PIXEL21_30 + PIXEL22_31 + PIXEL23_81 + PIXEL30_80 + PIXEL31_10 + PIXEL32_31 + PIXEL33_81 + break; + } + case 238: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_32 + PIXEL03_82 + PIXEL10_10 + PIXEL11_30 + PIXEL12_32 + PIXEL13_82 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL21_0 + PIXEL30_0 + PIXEL31_0 + PIXEL32_31 + PIXEL33_81 + } + else + { + PIXEL20_21 + PIXEL21_70 + PIXEL30_50 + PIXEL31_83 + PIXEL32_14 + PIXEL33_12 + } + PIXEL22_31 + PIXEL23_81 + break; + } + case 190: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL12_0 + PIXEL13_0 + PIXEL23_32 + PIXEL33_82 + } + else + { + PIXEL02_21 + PIXEL03_50 + PIXEL12_70 + PIXEL13_83 + PIXEL23_13 + PIXEL33_11 + } + PIXEL10_10 + PIXEL11_30 + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + break; + } + case 187: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + PIXEL11_0 + PIXEL20_31 + PIXEL30_81 + } + else + { + PIXEL00_50 + PIXEL01_21 + PIXEL10_83 + PIXEL11_70 + PIXEL20_14 + PIXEL30_12 + } + PIXEL02_10 + PIXEL03_80 + PIXEL12_30 + PIXEL13_10 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 243: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_10 + PIXEL03_80 + PIXEL10_81 + PIXEL11_31 + PIXEL12_30 + PIXEL13_10 + PIXEL20_82 + PIXEL21_32 + if (Diff(w[6], w[8])) + { + PIXEL22_0 + PIXEL23_0 + PIXEL30_82 + PIXEL31_32 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL22_70 + PIXEL23_21 + PIXEL30_11 + PIXEL31_13 + PIXEL32_83 + PIXEL33_50 + } + break; + } + case 119: + { + if (Diff(w[2], w[6])) + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_0 + PIXEL03_0 + PIXEL12_0 + PIXEL13_0 + } + else + { + PIXEL00_12 + PIXEL01_14 + PIXEL02_83 + PIXEL03_50 + PIXEL12_70 + PIXEL13_21 + } + PIXEL10_81 + PIXEL11_31 + PIXEL20_82 + PIXEL21_32 + PIXEL22_30 + PIXEL23_10 + PIXEL30_82 + PIXEL31_32 + PIXEL32_10 + PIXEL33_80 + break; + } + case 237: + case 233: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_60 + PIXEL03_20 + PIXEL10_32 + PIXEL11_32 + PIXEL12_70 + PIXEL13_60 + PIXEL20_0 + PIXEL21_0 + PIXEL22_31 + PIXEL23_81 + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + PIXEL32_31 + PIXEL33_81 + break; + } + case 175: + case 47: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + PIXEL02_32 + PIXEL03_82 + PIXEL10_0 + PIXEL11_0 + PIXEL12_32 + PIXEL13_82 + PIXEL20_31 + PIXEL21_31 + PIXEL22_70 + PIXEL23_60 + PIXEL30_81 + PIXEL31_81 + PIXEL32_60 + PIXEL33_20 + break; + } + case 183: + case 151: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL10_81 + PIXEL11_31 + PIXEL12_0 + PIXEL13_0 + PIXEL20_60 + PIXEL21_70 + PIXEL22_32 + PIXEL23_32 + PIXEL30_20 + PIXEL31_60 + PIXEL32_82 + PIXEL33_82 + break; + } + case 245: + case 244: + { + PIXEL00_20 + PIXEL01_60 + PIXEL02_81 + PIXEL03_81 + PIXEL10_60 + PIXEL11_70 + PIXEL12_31 + PIXEL13_31 + PIXEL20_82 + PIXEL21_32 + PIXEL22_0 + PIXEL23_0 + PIXEL30_82 + PIXEL31_32 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + case 250: + { + PIXEL00_80 + PIXEL01_10 + PIXEL02_10 + PIXEL03_80 + PIXEL10_10 + PIXEL11_30 + PIXEL12_30 + PIXEL13_10 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + break; + } + case 123: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_10 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 95: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL11_0 + PIXEL12_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_30 + PIXEL23_10 + PIXEL30_80 + PIXEL31_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 222: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_10 + PIXEL11_30 + PIXEL12_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 252: + { + PIXEL00_80 + PIXEL01_61 + PIXEL02_81 + PIXEL03_81 + PIXEL10_10 + PIXEL11_30 + PIXEL12_31 + PIXEL13_31 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_0 + PIXEL23_0 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + case 249: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_61 + PIXEL03_80 + PIXEL10_32 + PIXEL11_32 + PIXEL12_30 + PIXEL13_10 + PIXEL20_0 + PIXEL21_0 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + break; + } + case 235: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_61 + PIXEL20_0 + PIXEL21_0 + PIXEL22_31 + PIXEL23_81 + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + PIXEL32_31 + PIXEL33_81 + break; + } + case 111: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + PIXEL02_32 + PIXEL03_82 + PIXEL10_0 + PIXEL11_0 + PIXEL12_32 + PIXEL13_82 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_61 + PIXEL32_10 + PIXEL33_80 + break; + } + case 63: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_0 + PIXEL11_0 + PIXEL12_0 + PIXEL20_31 + PIXEL21_31 + PIXEL22_30 + PIXEL23_10 + PIXEL30_81 + PIXEL31_81 + PIXEL32_61 + PIXEL33_80 + break; + } + case 159: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL11_0 + PIXEL12_0 + PIXEL13_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_32 + PIXEL23_32 + PIXEL30_80 + PIXEL31_61 + PIXEL32_82 + PIXEL33_82 + break; + } + case 215: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL10_81 + PIXEL11_31 + PIXEL12_0 + PIXEL13_0 + PIXEL20_61 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 246: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_61 + PIXEL11_30 + PIXEL12_0 + PIXEL20_82 + PIXEL21_32 + PIXEL22_0 + PIXEL23_0 + PIXEL30_82 + PIXEL31_32 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + case 254: + { + PIXEL00_80 + PIXEL01_10 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_10 + PIXEL11_30 + PIXEL12_0 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_0 + PIXEL23_0 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + case 253: + { + PIXEL00_82 + PIXEL01_82 + PIXEL02_81 + PIXEL03_81 + PIXEL10_32 + PIXEL11_32 + PIXEL12_31 + PIXEL13_31 + PIXEL20_0 + PIXEL21_0 + PIXEL22_0 + PIXEL23_0 + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + case 251: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_10 + PIXEL03_80 + PIXEL11_0 + PIXEL12_30 + PIXEL13_10 + PIXEL20_0 + PIXEL21_0 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + break; + } + case 239: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + PIXEL02_32 + PIXEL03_82 + PIXEL10_0 + PIXEL11_0 + PIXEL12_32 + PIXEL13_82 + PIXEL20_0 + PIXEL21_0 + PIXEL22_31 + PIXEL23_81 + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + PIXEL32_31 + PIXEL33_81 + break; + } + case 127: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + if (Diff(w[2], w[6])) + { + PIXEL02_0 + PIXEL03_0 + PIXEL13_0 + } + else + { + PIXEL02_50 + PIXEL03_50 + PIXEL13_50 + } + PIXEL10_0 + PIXEL11_0 + PIXEL12_0 + if (Diff(w[8], w[4])) + { + PIXEL20_0 + PIXEL30_0 + PIXEL31_0 + } + else + { + PIXEL20_50 + PIXEL30_50 + PIXEL31_50 + } + PIXEL21_0 + PIXEL22_30 + PIXEL23_10 + PIXEL32_10 + PIXEL33_80 + break; + } + case 191: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL10_0 + PIXEL11_0 + PIXEL12_0 + PIXEL13_0 + PIXEL20_31 + PIXEL21_31 + PIXEL22_32 + PIXEL23_32 + PIXEL30_81 + PIXEL31_81 + PIXEL32_82 + PIXEL33_82 + break; + } + case 223: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + PIXEL01_0 + PIXEL10_0 + } + else + { + PIXEL00_50 + PIXEL01_50 + PIXEL10_50 + } + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL11_0 + PIXEL12_0 + PIXEL13_0 + PIXEL20_10 + PIXEL21_30 + PIXEL22_0 + if (Diff(w[6], w[8])) + { + PIXEL23_0 + PIXEL32_0 + PIXEL33_0 + } + else + { + PIXEL23_50 + PIXEL32_50 + PIXEL33_50 + } + PIXEL30_80 + PIXEL31_10 + break; + } + case 247: + { + PIXEL00_81 + PIXEL01_31 + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL10_81 + PIXEL11_31 + PIXEL12_0 + PIXEL13_0 + PIXEL20_82 + PIXEL21_32 + PIXEL22_0 + PIXEL23_0 + PIXEL30_82 + PIXEL31_32 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + case 255: + { + if (Diff(w[4], w[2])) + { + PIXEL00_0 + } + else + { + PIXEL00_20 + } + PIXEL01_0 + PIXEL02_0 + if (Diff(w[2], w[6])) + { + PIXEL03_0 + } + else + { + PIXEL03_20 + } + PIXEL10_0 + PIXEL11_0 + PIXEL12_0 + PIXEL13_0 + PIXEL20_0 + PIXEL21_0 + PIXEL22_0 + PIXEL23_0 + if (Diff(w[8], w[4])) + { + PIXEL30_0 + } + else + { + PIXEL30_20 + } + PIXEL31_0 + PIXEL32_0 + if (Diff(w[6], w[8])) + { + PIXEL33_0 + } + else + { + PIXEL33_20 + } + break; + } + } diff --git a/Source/GlideHQ/TextureFilters_lq2x.h b/Source/GlideHQ/TextureFilters_lq2x.h new file mode 100644 index 000000000..b5318ab81 --- /dev/null +++ b/Source/GlideHQ/TextureFilters_lq2x.h @@ -0,0 +1,1307 @@ +/* +Copyright (C) 2003 Rice1964 + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +*/ + +/* Copyright (C) 2007 Hiroshi Morii + * Modified for the Texture Filtering library + */ + +case 0 : +case 2 : +case 4 : +case 6 : +case 8 : +case 12 : +case 16 : +case 20 : +case 24 : +case 28 : +case 32 : +case 34 : +case 36 : +case 38 : +case 40 : +case 44 : +case 48 : +case 52 : +case 56 : +case 60 : +case 64 : +case 66 : +case 68 : +case 70 : +case 96 : +case 98 : +case 100 : +case 102 : +case 128 : +case 130 : +case 132 : +case 134 : +case 136 : +case 140 : +case 144 : +case 148 : +case 152 : +case 156 : +case 160 : +case 162 : +case 164 : +case 166 : +case 168 : +case 172 : +case 176 : +case 180 : +case 184 : +case 188 : +case 192 : +case 194 : +case 196 : +case 198 : +case 224 : +case 226 : +case 228 : +case 230 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + P3 = IC(0); +} break; +case 1 : +case 5 : +case 9 : +case 13 : +case 17 : +case 21 : +case 25 : +case 29 : +case 33 : +case 37 : +case 41 : +case 45 : +case 49 : +case 53 : +case 57 : +case 61 : +case 65 : +case 69 : +case 97 : +case 101 : +case 129 : +case 133 : +case 137 : +case 141 : +case 145 : +case 149 : +case 153 : +case 157 : +case 161 : +case 165 : +case 169 : +case 173 : +case 177 : +case 181 : +case 185 : +case 189 : +case 193 : +case 197 : +case 225 : +case 229 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + P3 = IC(1); +} break; +case 3 : +case 35 : +case 67 : +case 99 : +case 131 : +case 163 : +case 195 : +case 227 : +{ + P0 = IC(2); + P1 = IC(2); + P2 = IC(2); + P3 = IC(2); +} break; +case 7 : +case 39 : +case 71 : +case 103 : +case 135 : +case 167 : +case 199 : +case 231 : +{ + P0 = IC(3); + P1 = IC(3); + P2 = IC(3); + P3 = IC(3); +} break; +case 10 : +case 138 : +{ + P1 = IC(0); + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I211(0, 1, 3); + } +} break; +case 11 : +case 27 : +case 75 : +case 139 : +case 155 : +case 203 : +{ + P1 = IC(2); + P2 = IC(2); + P3 = IC(2); + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 14 : +case 142 : +{ + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + P1 = IC(0); + } else { + P0 = I332(1, 3, 0); + P1 = I31(0, 1); + } +} break; +case 15 : +case 143 : +case 207 : +{ + P2 = IC(4); + P3 = IC(4); + if (HQ2X_MUL) { + P0 = IC(4); + P1 = IC(4); + } else { + P0 = I332(1, 3, 4); + P1 = I31(4, 1); + } +} break; +case 18 : +case 22 : +case 30 : +case 50 : +case 54 : +case 62 : +case 86 : +case 118 : +{ + P0 = IC(0); + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 19 : +case 51 : +{ + P2 = IC(2); + P3 = IC(2); + if (HQ2X_MUR) { + P0 = IC(2); + P1 = IC(2); + } else { + P0 = I31(2, 1); + P1 = I332(1, 5, 2); + } +} break; +case 23 : +case 55 : +case 119 : +{ + P2 = IC(3); + P3 = IC(3); + if (HQ2X_MUR) { + P0 = IC(3); + P1 = IC(3); + } else { + P0 = I31(3, 1); + P1 = I332(1, 5, 3); + } +} break; +case 26 : +{ + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I211(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 31 : +case 95 : +{ + P2 = IC(4); + P3 = IC(4); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 42 : +case 170 : +{ + P1 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + P2 = IC(0); + } else { + P0 = I332(1, 3, 0); + P2 = I31(0, 3); + } +} break; +case 43 : +case 171 : +case 187 : +{ + P1 = IC(2); + P3 = IC(2); + if (HQ2X_MUL) { + P0 = IC(2); + P2 = IC(2); + } else { + P0 = I332(1, 3, 2); + P2 = I31(2, 3); + } +} break; +case 46 : +case 174 : +{ + P1 = IC(0); + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } +} break; +case 47 : +case 175 : +{ + P1 = IC(4); + P2 = IC(4); + P3 = IC(4); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 58 : +case 154 : +case 186 : +{ + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 59 : +{ + P2 = IC(2); + P3 = IC(2); + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 63 : +{ + P2 = IC(4); + P3 = IC(4); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 72 : +case 76 : +case 104 : +case 106 : +case 108 : +case 110 : +case 120 : +case 124 : +{ + P0 = IC(0); + P1 = IC(0); + P3 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } +} break; +case 73 : +case 77 : +case 105 : +case 109 : +case 125 : +{ + P1 = IC(1); + P3 = IC(1); + if (HQ2X_MDL) { + P0 = IC(1); + P2 = IC(1); + } else { + P0 = I31(1, 3); + P2 = I332(3, 7, 1); + } +} break; +case 74 : +{ + P1 = IC(0); + P3 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I211(0, 1, 3); + } +} break; +case 78 : +case 202 : +case 206 : +{ + P1 = IC(0); + P3 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } +} break; +case 79 : +{ + P1 = IC(4); + P3 = IC(4); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I611(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } +} break; +case 80 : +case 208 : +case 210 : +case 216 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } +} break; +case 81 : +case 209 : +case 217 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I211(1, 5, 7); + } +} break; +case 82 : +case 214 : +case 222 : +{ + P0 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 83 : +case 115 : +{ + P0 = IC(2); + P2 = IC(2); + if (HQ2X_MDR) { + P3 = IC(2); + } else { + P3 = I611(2, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 84 : +case 212 : +{ + P0 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P1 = IC(0); + P3 = IC(0); + } else { + P1 = I31(0, 5); + P3 = I332(5, 7, 0); + } +} break; +case 85 : +case 213 : +case 221 : +{ + P0 = IC(1); + P2 = IC(1); + if (HQ2X_MDR) { + P1 = IC(1); + P3 = IC(1); + } else { + P1 = I31(1, 5); + P3 = I332(5, 7, 1); + } +} break; +case 87 : +{ + P0 = IC(3); + P2 = IC(3); + if (HQ2X_MDR) { + P3 = IC(3); + } else { + P3 = I611(3, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(3); + } else { + P1 = I211(3, 1, 5); + } +} break; +case 88 : +case 248 : +case 250 : +{ + P0 = IC(0); + P1 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } +} break; +case 89 : +case 93 : +{ + P0 = IC(1); + P1 = IC(1); + if (HQ2X_MDL) { + P2 = IC(1); + } else { + P2 = I611(1, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I611(1, 5, 7); + } +} break; +case 90 : +{ + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 91 : +{ + if (HQ2X_MDL) { + P2 = IC(2); + } else { + P2 = I611(2, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(2); + } else { + P3 = I611(2, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 92 : +{ + P0 = IC(0); + P1 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } +} break; +case 94 : +{ + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 107 : +case 123 : +{ + P1 = IC(2); + P3 = IC(2); + if (HQ2X_MDL) { + P2 = IC(2); + } else { + P2 = I211(2, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 111 : +{ + P1 = IC(4); + P3 = IC(4); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 112 : +case 240 : +{ + P0 = IC(0); + P1 = IC(0); + if (HQ2X_MDR) { + P2 = IC(0); + P3 = IC(0); + } else { + P2 = I31(0, 7); + P3 = I332(5, 7, 0); + } +} break; +case 113 : +case 241 : +{ + P0 = IC(1); + P1 = IC(1); + if (HQ2X_MDR) { + P2 = IC(1); + P3 = IC(1); + } else { + P2 = I31(1, 7); + P3 = I332(5, 7, 1); + } +} break; +case 114 : +{ + P0 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 116 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } +} break; +case 117 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I611(1, 5, 7); + } +} break; +case 121 : +{ + P0 = IC(1); + P1 = IC(1); + if (HQ2X_MDL) { + P2 = IC(1); + } else { + P2 = I211(1, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I611(1, 5, 7); + } +} break; +case 122 : +{ + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I611(0, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 126 : +{ + P0 = IC(0); + P3 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 127 : +{ + P3 = IC(4); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I211(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I211(4, 1, 5); + } +} break; +case 146 : +case 150 : +case 178 : +case 182 : +case 190 : +{ + P0 = IC(0); + P2 = IC(0); + if (HQ2X_MUR) { + P1 = IC(0); + P3 = IC(0); + } else { + P1 = I332(1, 5, 0); + P3 = I31(0, 5); + } +} break; +case 147 : +case 179 : +{ + P0 = IC(2); + P2 = IC(2); + P3 = IC(2); + if (HQ2X_MUR) { + P1 = IC(2); + } else { + P1 = I611(2, 1, 5); + } +} break; +case 151 : +case 183 : +{ + P0 = IC(3); + P2 = IC(3); + P3 = IC(3); + if (HQ2X_MUR) { + P1 = IC(3); + } else { + P1 = I1411(3, 1, 5); + } +} break; +case 158 : +{ + P2 = IC(0); + P3 = IC(0); + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 159 : +{ + P2 = IC(4); + P3 = IC(4); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 191 : +{ + P2 = IC(4); + P3 = IC(4); + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 200 : +case 204 : +case 232 : +case 236 : +case 238 : +{ + P0 = IC(0); + P1 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + P3 = IC(0); + } else { + P2 = I332(3, 7, 0); + P3 = I31(0, 7); + } +} break; +case 201 : +case 205 : +{ + P0 = IC(1); + P1 = IC(1); + P3 = IC(1); + if (HQ2X_MDL) { + P2 = IC(1); + } else { + P2 = I611(1, 3, 7); + } +} break; +case 211 : +{ + P0 = IC(2); + P1 = IC(2); + P2 = IC(2); + if (HQ2X_MDR) { + P3 = IC(2); + } else { + P3 = I211(2, 5, 7); + } +} break; +case 215 : +{ + P0 = IC(3); + P2 = IC(3); + if (HQ2X_MDR) { + P3 = IC(3); + } else { + P3 = I211(3, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(3); + } else { + P1 = I1411(3, 1, 5); + } +} break; +case 218 : +{ + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 219 : +{ + P1 = IC(2); + P2 = IC(2); + if (HQ2X_MDR) { + P3 = IC(2); + } else { + P3 = I211(2, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 220 : +{ + P0 = IC(0); + P1 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I611(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } +} break; +case 223 : +{ + P2 = IC(4); + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I211(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I211(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; +case 233 : +case 237 : +{ + P0 = IC(1); + P1 = IC(1); + P3 = IC(1); + if (HQ2X_MDL) { + P2 = IC(1); + } else { + P2 = I1411(1, 3, 7); + } +} break; +case 234 : +{ + P1 = IC(0); + P3 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(0); + } else { + P0 = I611(0, 1, 3); + } +} break; +case 235 : +{ + P1 = IC(2); + P3 = IC(2); + if (HQ2X_MDL) { + P2 = IC(2); + } else { + P2 = I1411(2, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 239 : +{ + P1 = IC(4); + P3 = IC(4); + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } +} break; +case 242 : +{ + P0 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I211(0, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I611(0, 1, 5); + } +} break; +case 243 : +{ + P0 = IC(2); + P1 = IC(2); + if (HQ2X_MDR) { + P2 = IC(2); + P3 = IC(2); + } else { + P2 = I31(2, 7); + P3 = I332(5, 7, 2); + } +} break; +case 244 : +{ + P0 = IC(0); + P1 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } +} break; +case 245 : +{ + P0 = IC(1); + P1 = IC(1); + P2 = IC(1); + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I1411(1, 5, 7); + } +} break; +case 246 : +{ + P0 = IC(0); + P2 = IC(0); + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 247 : +{ + P0 = IC(3); + P2 = IC(3); + if (HQ2X_MDR) { + P3 = IC(3); + } else { + P3 = I1411(3, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(3); + } else { + P1 = I1411(3, 1, 5); + } +} break; +case 249 : +{ + P0 = IC(1); + P1 = IC(1); + if (HQ2X_MDL) { + P2 = IC(1); + } else { + P2 = I1411(1, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I211(1, 5, 7); + } +} break; +case 251 : +{ + P1 = IC(2); + if (HQ2X_MDL) { + P2 = IC(2); + } else { + P2 = I1411(2, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(2); + } else { + P3 = I211(2, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(2); + } else { + P0 = I211(2, 1, 3); + } +} break; +case 252 : +{ + P0 = IC(0); + P1 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } +} break; +case 253 : +{ + P0 = IC(1); + P1 = IC(1); + if (HQ2X_MDL) { + P2 = IC(1); + } else { + P2 = I1411(1, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(1); + } else { + P3 = I1411(1, 5, 7); + } +} break; +case 254 : +{ + P0 = IC(0); + if (HQ2X_MDL) { + P2 = IC(0); + } else { + P2 = I211(0, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(0); + } else { + P3 = I1411(0, 5, 7); + } + if (HQ2X_MUR) { + P1 = IC(0); + } else { + P1 = I211(0, 1, 5); + } +} break; +case 255 : +{ + if (HQ2X_MDL) { + P2 = IC(4); + } else { + P2 = I1411(4, 3, 7); + } + if (HQ2X_MDR) { + P3 = IC(4); + } else { + P3 = I1411(4, 5, 7); + } + if (HQ2X_MUL) { + P0 = IC(4); + } else { + P0 = I1411(4, 1, 3); + } + if (HQ2X_MUR) { + P1 = IC(4); + } else { + P1 = I1411(4, 1, 5); + } +} break; diff --git a/Source/GlideHQ/TxCache.cpp b/Source/GlideHQ/TxCache.cpp new file mode 100644 index 000000000..42f434551 --- /dev/null +++ b/Source/GlideHQ/TxCache.cpp @@ -0,0 +1,433 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef WIN32 +#pragma warning(disable: 4786) +#endif + +#include "TxCache.h" +#include "TxDbg.h" +#include +#include + +TxCache::~TxCache() +{ + /* free memory, clean up, etc */ + clear(); + + delete _txUtil; +} + +TxCache::TxCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident, + dispInfoFuncExt callback) +{ + _txUtil = new TxUtil(); + + _options = options; + _cacheSize = cachesize; + _callback = callback; + _totalSize = 0; + + /* save path name */ + if (path) + _path.assign(path); + + /* save ROM name */ + if (ident) + _ident.assign(ident); + + /* zlib memory buffers to (de)compress hires textures */ + if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) { + _gzdest0 = TxMemBuf::getInstance()->get(0); + _gzdest1 = TxMemBuf::getInstance()->get(1); + _gzdestLen = (TxMemBuf::getInstance()->size_of(0) < TxMemBuf::getInstance()->size_of(1)) ? + TxMemBuf::getInstance()->size_of(0) : TxMemBuf::getInstance()->size_of(1); + + if (!_gzdest0 || !_gzdest1 || !_gzdestLen) { + _options &= ~(GZ_TEXCACHE|GZ_HIRESTEXCACHE); + _gzdest0 = NULL; + _gzdest1 = NULL; + _gzdestLen = 0; + } + } +} + +boolean +TxCache::add(uint64 checksum, GHQTexInfo *info, int dataSize) +{ + /* NOTE: dataSize must be provided if info->data is zlib compressed. */ + + if (!checksum || !info->data) return 0; + + uint8 *dest = info->data; + uint16 format = info->format; + + if (!dataSize) { + dataSize = _txUtil->sizeofTx(info->width, info->height, info->format); + + if (!dataSize) return 0; + + if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) { + /* zlib compress it. compression level:1 (best speed) */ + uint32 destLen = _gzdestLen; + dest = (dest == _gzdest0) ? _gzdest1 : _gzdest0; + if (compress2(dest, &destLen, info->data, dataSize, 1) != Z_OK) { + dest = info->data; + DBG_INFO(80, L"Error: zlib compression failed!\n"); + } else { + DBG_INFO(80, L"zlib compressed: %.02fkb->%.02fkb\n", (float)dataSize/1000, (float)destLen/1000); + dataSize = destLen; + format |= GR_TEXFMT_GZ; + } + } + } + + /* if cache size exceeds limit, remove old cache */ + if (_cacheSize > 0) { + _totalSize += dataSize; + if ((_totalSize > _cacheSize) && !_cachelist.empty()) { + /* _cachelist is arranged so that frequently used textures are in the back */ + std::list::iterator itList = _cachelist.begin(); + while (itList != _cachelist.end()) { + /* find it in _cache */ + std::map::iterator itMap = _cache.find(*itList); + if (itMap != _cache.end()) { + /* yep we have it. remove it. */ + _totalSize -= (*itMap).second->size; + free((*itMap).second->info.data); + delete (*itMap).second; + _cache.erase(itMap); + } + itList++; + + /* check if memory cache has enough space */ + if (_totalSize <= _cacheSize) + break; + } + /* remove from _cachelist */ + _cachelist.erase(_cachelist.begin(), itList); + + DBG_INFO(80, L"+++++++++\n"); + } + _totalSize -= dataSize; + } + + /* cache it */ + uint8 *tmpdata = (uint8*)malloc(dataSize); + if (tmpdata) { + TXCACHE *txCache = new TXCACHE; + if (txCache) { + /* we can directly write as we filter, but for now we get away + * with doing memcpy after all the filtering is done. + */ + memcpy(tmpdata, dest, dataSize); + + /* copy it */ + memcpy(&txCache->info, info, sizeof(GHQTexInfo)); + txCache->info.data = tmpdata; + txCache->info.format = format; + txCache->size = dataSize; + + /* add to cache */ + if (_cacheSize > 0) { + _cachelist.push_back(checksum); + txCache->it = --(_cachelist.end()); + } + /* _cache[checksum] = txCache; */ + _cache.insert(std::map::value_type(checksum, txCache)); + +#ifdef DEBUG + DBG_INFO(80, L"[%5d] added!! crc:%08X %08X %d x %d gfmt:%x total:%.02fmb\n", + _cache.size(), (uint32)(checksum >> 32), (uint32)(checksum & 0xffffffff), + info->width, info->height, info->format, (float)_totalSize/1000000); + + DBG_INFO(80, L"smalllodlog2:%d largelodlog2:%d aspectratiolog2:%d\n", + txCache->info.smallLodLog2, txCache->info.largeLodLog2, txCache->info.aspectRatioLog2); + + if (info->tiles) { + DBG_INFO(80, L"tiles:%d un-tiled size:%d x %d\n", info->tiles, info->untiled_width, info->untiled_height); + } + + if (_cacheSize > 0) { + DBG_INFO(80, L"cache max config:%.02fmb\n", (float)_cacheSize/1000000); + + if (_cache.size() != _cachelist.size()) { + DBG_INFO(80, L"Error: cache/cachelist mismatch! (%d/%d)\n", _cache.size(), _cachelist.size()); + } + } +#endif + + /* total cache size */ + _totalSize += dataSize; + + return 1; + } + free(tmpdata); + } + + return 0; +} + +boolean +TxCache::get(uint64 checksum, GHQTexInfo *info) +{ + if (!checksum || _cache.empty()) return 0; + + /* find a match in cache */ + std::map::iterator itMap = _cache.find(checksum); + if (itMap != _cache.end()) { + /* yep, we've got it. */ + memcpy(info, &(((*itMap).second)->info), sizeof(GHQTexInfo)); + + /* push it to the back of the list */ + if (_cacheSize > 0) { + _cachelist.erase(((*itMap).second)->it); + _cachelist.push_back(checksum); + ((*itMap).second)->it = --(_cachelist.end()); + } + + /* zlib decompress it */ + if (info->format & GR_TEXFMT_GZ) { + uint32 destLen = _gzdestLen; + uint8 *dest = (_gzdest0 == info->data) ? _gzdest1 : _gzdest0; + if (uncompress(dest, &destLen, info->data, ((*itMap).second)->size) != Z_OK) { + DBG_INFO(80, L"Error: zlib decompression failed!\n"); + return 0; + } + info->data = dest; + info->format &= ~GR_TEXFMT_GZ; + DBG_INFO(80, L"zlib decompressed: %.02fkb->%.02fkb\n", (float)(((*itMap).second)->size)/1000, (float)destLen/1000); + } + + return 1; + } + + return 0; +} + +boolean +TxCache::save(const wchar_t *path, const wchar_t *filename, int config) +{ + if (!_cache.empty()) { + /* dump cache to disk */ + char cbuf[MAX_PATH]; + + CPath cachepath(stdstr().FromUTF16(path),""); + cachepath.CreateDirectory(); + + /* Ugly hack to enable fopen/gzopen in Win9x */ +#ifdef WIN32 + wchar_t curpath[MAX_PATH]; + GETCWD(MAX_PATH, curpath); + cachepath.ChangeDirectory(); +#else + char curpath[MAX_PATH]; + wcstombs(cbuf, cachepath.string().c_str(), MAX_PATH); + GETCWD(MAX_PATH, curpath); + CHDIR(cbuf); +#endif + +#ifdef tofix + wcstombs(cbuf, filename, MAX_PATH); + + gzFile gzfp = gzopen(cbuf, "wb1"); + DBG_INFO(80, L"gzfp:%x file:%ls\n", gzfp, filename); + if (gzfp) { + /* write header to determine config match */ + gzwrite(gzfp, &config, 4); + + std::map::iterator itMap = _cache.begin(); + while (itMap != _cache.end()) { + uint8 *dest = (*itMap).second->info.data; + uint32 destLen = (*itMap).second->size; + uint16 format = (*itMap).second->info.format; + + /* to keep things simple, we save the texture data in a zlib uncompressed state. */ + /* sigh... for those who cannot wait the extra few seconds. changed to keep + * texture data in a zlib compressed state. if the GZ_TEXCACHE or GZ_HIRESTEXCACHE + * option is toggled, the cache will need to be rebuilt. + */ + /*if (format & GR_TEXFMT_GZ) { + dest = _gzdest0; + destLen = _gzdestLen; + if (dest && destLen) { + if (uncompress(dest, &destLen, (*itMap).second->info.data, (*itMap).second->size) != Z_OK) { + dest = NULL; + destLen = 0; + } + format &= ~GR_TEXFMT_GZ; + } + }*/ + + if (dest && destLen) { + /* texture checksum */ + gzwrite(gzfp, &((*itMap).first), 8); + + /* other texture info */ + gzwrite(gzfp, &((*itMap).second->info.width), 4); + gzwrite(gzfp, &((*itMap).second->info.height), 4); + gzwrite(gzfp, &format, 2); + + gzwrite(gzfp, &((*itMap).second->info.smallLodLog2), 4); + gzwrite(gzfp, &((*itMap).second->info.largeLodLog2), 4); + gzwrite(gzfp, &((*itMap).second->info.aspectRatioLog2), 4); + + gzwrite(gzfp, &((*itMap).second->info.tiles), 4); + gzwrite(gzfp, &((*itMap).second->info.untiled_width), 4); + gzwrite(gzfp, &((*itMap).second->info.untiled_height), 4); + + gzwrite(gzfp, &((*itMap).second->info.is_hires_tex), 1); + + gzwrite(gzfp, &destLen, 4); + gzwrite(gzfp, dest, destLen); + } + + itMap++; + + /* not ready yet */ + /*if (_callback) + (*_callback)(L"Total textures saved to HDD: %d\n", std::distance(itMap, _cache.begin()));*/ + } + gzclose(gzfp); + } + + CHDIR(curpath); +#endif + } + return _cache.empty(); +} + +boolean +TxCache::load(const wchar_t *path, const wchar_t *filename, int config) +{ + /* find it on disk */ + CPath cbuf(stdstr().FromUTF16(path).c_str(),stdstr().FromUTF16(filename).c_str()); + + gzFile gzfp = gzopen(cbuf, "rb"); + DBG_INFO(80, L"gzfp:%x file:%ls\n", gzfp, filename); + if (gzfp) { + /* yep, we have it. load it into memory cache. */ + int dataSize; + uint64 checksum; + GHQTexInfo tmpInfo; + int tmpconfig; + /* read header to determine config match */ + gzread(gzfp, &tmpconfig, 4); + + if (tmpconfig == config) { + do { + memset(&tmpInfo, 0, sizeof(GHQTexInfo)); + + gzread(gzfp, &checksum, 8); + + gzread(gzfp, &tmpInfo.width, 4); + gzread(gzfp, &tmpInfo.height, 4); + gzread(gzfp, &tmpInfo.format, 2); + + gzread(gzfp, &tmpInfo.smallLodLog2, 4); + gzread(gzfp, &tmpInfo.largeLodLog2, 4); + gzread(gzfp, &tmpInfo.aspectRatioLog2, 4); + + gzread(gzfp, &tmpInfo.tiles, 4); + gzread(gzfp, &tmpInfo.untiled_width, 4); + gzread(gzfp, &tmpInfo.untiled_height, 4); + + gzread(gzfp, &tmpInfo.is_hires_tex, 1); + + gzread(gzfp, &dataSize, 4); + + tmpInfo.data = (uint8*)malloc(dataSize); + if (tmpInfo.data) { + gzread(gzfp, tmpInfo.data, dataSize); + + /* add to memory cache */ + add(checksum, &tmpInfo, (tmpInfo.format & GR_TEXFMT_GZ) ? dataSize : 0); + + free(tmpInfo.data); + } else { + gzseek(gzfp, dataSize, SEEK_CUR); + } + + /* skip in between to prevent the loop from being tied down to vsync */ + if (_callback && (!(_cache.size() % 100) || gzeof(gzfp))) + (*_callback)(L"[%d] total mem:%.02fmb - %ls\n", _cache.size(), (float)_totalSize/1000000, filename); + + } while (!gzeof(gzfp)); + gzclose(gzfp); + } + } + + return !_cache.empty(); +} + +boolean +TxCache::del(uint64 checksum) +{ + if (!checksum || _cache.empty()) return 0; + + std::map::iterator itMap = _cache.find(checksum); + if (itMap != _cache.end()) { + + /* for texture cache (not hi-res cache) */ + if (!_cachelist.empty()) _cachelist.erase(((*itMap).second)->it); + + /* remove from cache */ + free((*itMap).second->info.data); + _totalSize -= (*itMap).second->size; + delete (*itMap).second; + _cache.erase(itMap); + + DBG_INFO(80, L"removed from cache: checksum = %08X %08X\n", (uint32)(checksum & 0xffffffff), (uint32)(checksum >> 32)); + + return 1; + } + + return 0; +} + +boolean +TxCache::is_cached(uint64 checksum) +{ + std::map::iterator itMap = _cache.find(checksum); + if (itMap != _cache.end()) return 1; + + return 0; +} + +void +TxCache::clear() +{ + if (!_cache.empty()) { + std::map::iterator itMap = _cache.begin(); + while (itMap != _cache.end()) { + free((*itMap).second->info.data); + delete (*itMap).second; + itMap++; + } + _cache.clear(); + } + + if (!_cachelist.empty()) _cachelist.clear(); + + _totalSize = 0; +} diff --git a/Source/GlideHQ/TxCache.h b/Source/GlideHQ/TxCache.h new file mode 100644 index 000000000..0b31b5443 --- /dev/null +++ b/Source/GlideHQ/TxCache.h @@ -0,0 +1,69 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXCACHE_H__ +#define __TXCACHE_H__ + +#include "TxInternal.h" +#include "TxUtil.h" +#include +#include +#include + +class TxCache +{ +private: + std::list _cachelist; + uint8 *_gzdest0; + uint8 *_gzdest1; + uint32 _gzdestLen; +protected: + int _options; + std::wstring _ident; + std::wstring _path; + dispInfoFuncExt _callback; + TxUtil *_txUtil; + struct TXCACHE { + int size; + GHQTexInfo info; + std::list::iterator it; + }; + int _totalSize; + int _cacheSize; + std::map _cache; + boolean save(const wchar_t *path, const wchar_t *filename, const int config); + boolean load(const wchar_t *path, const wchar_t *filename, const int config); + boolean del(uint64 checksum); /* checksum hi:palette low:texture */ + boolean is_cached(uint64 checksum); /* checksum hi:palette low:texture */ + void clear(); +public: + ~TxCache(); + TxCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident, + dispInfoFuncExt callback); + boolean add(uint64 checksum, /* checksum hi:palette low:texture */ + GHQTexInfo *info, int dataSize = 0); + boolean get(uint64 checksum, /* checksum hi:palette low:texture */ + GHQTexInfo *info); +}; + +#endif /* __TXCACHE_H__ */ diff --git a/Source/GlideHQ/TxDbg.cpp b/Source/GlideHQ/TxDbg.cpp new file mode 100644 index 000000000..f648d11f4 --- /dev/null +++ b/Source/GlideHQ/TxDbg.cpp @@ -0,0 +1,69 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DBG_LEVEL 80 + +#include "TxDbg.h" +#include +#include +#include +#include + +TxDbg::TxDbg() +{ + _level = DBG_LEVEL; + CPath Dir(CPath::MODULE_DIRECTORY,""); + Dir.AppendDirectory("Logs"); + + if (!_dbgfile) +#ifdef GHQCHK + _dbgfile = fopen(CPath(Dir,"ghqchk.txt"), "w"); +#else + _dbgfile = fopen(CPath((LPCSTR)Dir,"glidehq.dbg"), "w"); +#endif +} + +TxDbg::~TxDbg() +{ + if (_dbgfile) { + fclose(_dbgfile); + _dbgfile = 0; + } + + _level = DBG_LEVEL; +} + +void +TxDbg::output(const int level, const wchar_t *format, ...) +{ + if (level > _level) + return; + + stdstr_f newformat("%d:\t%s",level,stdstr().FromUTF16(format).c_str()); + + va_list args; + va_start(args, format); + vfwprintf(_dbgfile, newformat.ToUTF16().c_str(), args); + fflush(_dbgfile); + va_end(args); +} diff --git a/Source/GlideHQ/TxDbg.h b/Source/GlideHQ/TxDbg.h new file mode 100644 index 000000000..f22b87e87 --- /dev/null +++ b/Source/GlideHQ/TxDbg.h @@ -0,0 +1,61 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXDBG_H__ +#define __TXDBG_H__ + +#include +#include "TxInternal.h" + +class TxDbg +{ +private: + FILE* _dbgfile; + int _level; + TxDbg(); +public: + static TxDbg* getInstance() { + static TxDbg txDbg; + return &txDbg; + } + ~TxDbg(); + void output(const int level, const wchar_t *format, ...); +}; + +#ifdef DEBUG +#define DBG_INFO TxDbg::getInstance()->output +#define INFO DBG_INFO +#else +#define DBG_INFO 0 && (wchar_t) +#ifdef GHQCHK +#define INFO TxDbg::getInstance()->output +#else +#if 0 /* XXX enable this to log basic hires texture checks */ +#define INFO TxDbg::getInstance()->output +#else +#define INFO DBG_INFO +#endif +#endif +#endif + +#endif /* __TXDBG_H__ */ diff --git a/Source/GlideHQ/TxFilter.cpp b/Source/GlideHQ/TxFilter.cpp new file mode 100644 index 000000000..39b0a3296 --- /dev/null +++ b/Source/GlideHQ/TxFilter.cpp @@ -0,0 +1,692 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef WIN32 +#pragma warning(disable: 4786) +#endif + +#include +#include "TxFilter.h" +#include "TextureFilters.h" +#include "TxDbg.h" +#include "bldno.h" + +void TxFilter::clear() +{ + /* clear hires texture cache */ + delete _txHiResCache; + + /* clear texture cache */ + delete _txTexCache; + + /* free memory */ + TxMemBuf::getInstance()->shutdown(); + + /* clear other stuff */ + delete _txImage; + delete _txQuantize; + delete _txUtil; +} + +TxFilter::~TxFilter() +{ + clear(); +} + +TxFilter::TxFilter(int maxwidth, int maxheight, int maxbpp, int options, + int cachesize, wchar_t *path, wchar_t *ident, + dispInfoFuncExt callback) : + _numcore(0), + _tex1(NULL), + _tex2(NULL), + _maxwidth(0), + _maxheight(0), + _maxbpp(0), + _options(0), + _cacheSize(0), + _txQuantize(NULL), + _txTexCache(NULL), + _txHiResCache(NULL), + _txUtil(NULL), + _txImage(NULL), + _initialized(false) +{ + /* HACKALERT: the emulator misbehaves and sometimes forgets to shutdown */ + if ((ident && wcscmp(ident, L"DEFAULT") != 0 && _ident.compare(ident) == 0) && + _maxwidth == maxwidth && + _maxheight == maxheight && + _maxbpp == maxbpp && + _options == options && + _cacheSize == cachesize) return; + clear(); /* gcc does not allow the destructor to be called */ + + /* shamelessness :P this first call to the debug output message creates + * a file in the executable directory. */ + INFO(0, L"------------------------------------------------------------------\n"); +#ifdef GHQCHK + INFO(0, L" GlideHQ Hires Texture Checker 1.02.00.%d\n", BUILD_NUMBER); +#else +// INFO(0, L" GlideHQ version 1.02.00.%d\n", BUILD_NUMBER); +#endif + INFO(0, L" Copyright (C) 2010 Hiroshi Morii All Rights Reserved\n"); + INFO(0, L" email : koolsmoky(at)users.sourceforge.net\n"); + INFO(0, L" website : http://www.3dfxzone.it/koolsmoky\n"); + INFO(0, L"\n"); + INFO(0, L" Glide64 official website : http://glide64.emuxhaven.net\n"); + INFO(0, L"------------------------------------------------------------------\n"); + + _options = options; + + _txImage = new TxImage(); + _txQuantize = new TxQuantize(); + _txUtil = new TxUtil(); + + /* get number of CPU cores. */ + _numcore = _txUtil->getNumberofProcessors(); + + _initialized = 0; + + _tex1 = NULL; + _tex2 = NULL; + + /* XXX: anything larger than 1024 * 1024 is overkill */ + _maxwidth = maxwidth > 1024 ? 1024 : maxwidth; + _maxheight = maxheight > 1024 ? 1024 : maxheight; + _maxbpp = maxbpp; + + _cacheSize = cachesize; + + /* TODO: validate options and do overrides here*/ + + /* save path name */ + if (path) + _path.assign(path); + + /* save ROM name */ + if (ident && wcscmp(ident, L"DEFAULT") != 0) + _ident.assign(ident); + + /* check for dxtn extensions */ + if (!TxLoadLib::getInstance()->getdxtCompressTexFuncExt()) + _options &= ~S3TC_COMPRESSION; + + if (!TxLoadLib::getInstance()->getfxtCompressTexFuncExt()) + _options &= ~FXT1_COMPRESSION; + + switch (options & COMPRESSION_MASK) { + case FXT1_COMPRESSION: + case S3TC_COMPRESSION: + break; + case NCC_COMPRESSION: + default: + _options &= ~COMPRESSION_MASK; + } + + if (TxMemBuf::getInstance()->init(_maxwidth, _maxheight)) { + if (!_tex1) + _tex1 = TxMemBuf::getInstance()->get(0); + + if (!_tex2) + _tex2 = TxMemBuf::getInstance()->get(1); + } + +#if !_16BPP_HACK + /* initialize hq4x filter */ + hq4x_init(); +#endif + + /* initialize texture cache in bytes. 128Mb will do nicely in most cases */ + _txTexCache = new TxTexCache(_options, _cacheSize, _path.c_str(), _ident.c_str(), callback); + + /* hires texture */ +#if HIRES_TEXTURE + _txHiResCache = new TxHiResCache(_maxwidth, _maxheight, _maxbpp, _options, _path.c_str(), _ident.c_str(), callback); + + if (_txHiResCache->empty()) + _options &= ~HIRESTEXTURES_MASK; +#endif + + if (!(_options & COMPRESS_TEX)) + _options &= ~COMPRESSION_MASK; + + if (_tex1 && _tex2) + _initialized = 1; +} + +boolean +TxFilter::filter(uint8 *src, int srcwidth, int srcheight, uint16 srcformat, uint64 g64crc, GHQTexInfo *info) +{ + uint8 *texture = src; + uint8 *tmptex = _tex1; + uint16 destformat = srcformat; + + /* We need to be initialized first! */ + if (!_initialized) return 0; + + /* find cached textures */ + if (_cacheSize) { + + /* calculate checksum of source texture */ + if (!g64crc) + g64crc = (uint64)(_txUtil->checksumTx(texture, srcwidth, srcheight, srcformat)); + + DBG_INFO(80, L"filter: crc:%08X %08X %d x %d gfmt:%x\n", + (uint32)(g64crc >> 32), (uint32)(g64crc & 0xffffffff), srcwidth, srcheight, srcformat); + +#if 0 /* use hirestex to retrieve cached textures. */ + /* check if we have it in cache */ + if (!(g64crc & 0xffffffff00000000) && /* we reach here only when there is no hires texture for this crc */ + _txTexCache->get(g64crc, info)) { + DBG_INFO(80, L"cache hit: %d x %d gfmt:%x\n", info->width, info->height, info->format); + return 1; /* yep, we've got it */ + } +#endif + } + + /* Leave small textures alone because filtering makes little difference. + * Moreover, some filters require at least 4 * 4 to work. + * Bypass _options to do ARGB8888->16bpp if _maxbpp=16 or forced color reduction. + */ + if ((srcwidth >= 4 && srcheight >= 4) && + ((_options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESSION_MASK)) || + (srcformat == GR_TEXFMT_ARGB_8888 && (_maxbpp < 32 || _options & FORCE16BPP_TEX)))) { + +#if !_16BPP_HACK + /* convert textures to a format that the compressor accepts (ARGB8888) */ + if (_options & COMPRESSION_MASK) { +#endif + if (srcformat != GR_TEXFMT_ARGB_8888) { + if (!_txQuantize->quantize(texture, tmptex, srcwidth, srcheight, srcformat, GR_TEXFMT_ARGB_8888)) { + DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", srcformat); + return 0; + } + texture = tmptex; + destformat = GR_TEXFMT_ARGB_8888; + } +#if !_16BPP_HACK + } +#endif + + switch (destformat) { + case GR_TEXFMT_ARGB_8888: + + /* + * prepare texture enhancements (x2, x4 scalers) + */ + int scale_shift = 0, num_filters = 0; + uint32 filter = 0; + + if ((_options & ENHANCEMENT_MASK) == HQ4X_ENHANCEMENT) { + if (srcwidth <= (_maxwidth >> 2) && srcheight <= (_maxheight >> 2)) { + filter |= HQ4X_ENHANCEMENT; + scale_shift = 2; + num_filters++; + } else if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + filter |= HQ2X_ENHANCEMENT; + scale_shift = 1; + num_filters++; + } + } else if (_options & ENHANCEMENT_MASK) { + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + filter |= (_options & ENHANCEMENT_MASK); + scale_shift = 1; + num_filters++; + } + } + + /* + * prepare texture filters + */ + if (_options & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) { + filter |= (_options & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)); + num_filters++; + } + + /* + * execute texture enhancements and filters + */ + while (num_filters > 0) { + + tmptex = (texture == _tex1) ? _tex2 : _tex1; + + uint8 *_texture = texture; + uint8 *_tmptex = tmptex; + + unsigned int numcore = _numcore; + unsigned int blkrow = 0; + while (numcore > 1 && blkrow == 0) { + blkrow = (srcheight >> 2) / numcore; + numcore--; + } + if (blkrow > 0 && numcore > 1) { +#ifdef tofix + boost::thread *thrd[MAX_NUMCORE]; + unsigned int i; + int blkheight = blkrow << 2; + unsigned int srcStride = (srcwidth * blkheight) << 2; + unsigned int destStride = srcStride << scale_shift << scale_shift; + for (i = 0; i < numcore - 1; i++) { + thrd[i] = new boost::thread(boost::bind(filter_8888, + (uint32*)_texture, + srcwidth, + blkheight, + (uint32*)_tmptex, + filter)); + _texture += srcStride; + _tmptex += destStride; + } + thrd[i] = new boost::thread(boost::bind(filter_8888, + (uint32*)_texture, + srcwidth, + srcheight - blkheight * i, + (uint32*)_tmptex, + filter)); + for (i = 0; i < numcore; i++) { + thrd[i]->join(); + delete thrd[i]; + } +#endif + } else { + filter_8888((uint32*)_texture, srcwidth, srcheight, (uint32*)_tmptex, filter); + } + + if (filter & ENHANCEMENT_MASK) { + srcwidth <<= scale_shift; + srcheight <<= scale_shift; + filter &= ~ENHANCEMENT_MASK; + scale_shift = 0; + } + + texture = tmptex; + num_filters--; + } + + /* + * texture compression + */ + /* ignored if we only have texture compression option on. + * only done when texture enhancer is used. see constructor. */ + if ((_options & COMPRESSION_MASK) && + (srcwidth >= 64 && srcheight >= 64) /* Texture compression is not suitable for low pixel coarse detail + * textures. The assumption here is that textures larger than 64x64 + * have enough detail to produce decent quality when compressed. The + * down side is that narrow stripped textures that the N64 often use + * for large background textures are also ignored. It would be more + * reasonable if decisions are made based on fourier-transform + * spectrum or RMS error. + */ + ) { + int compressionType = _options & COMPRESSION_MASK; + int tmpwidth, tmpheight; + uint16 tmpformat; + /* XXX: textures that use 8bit alpha channel look bad with the current + * fxt1 library, so we substitute it with dxtn for now. afaik all gfx + * cards that support fxt1 also support dxtn. (3dfx and Intel) */ + if ((destformat == GR_TEXFMT_ALPHA_INTENSITY_88) || + (destformat == GR_TEXFMT_ARGB_8888) || + (destformat == GR_TEXFMT_ALPHA_8)) { + compressionType = S3TC_COMPRESSION; + } + tmptex = (texture == _tex1) ? _tex2 : _tex1; + if (_txQuantize->compress(texture, tmptex, + srcwidth, srcheight, srcformat, + &tmpwidth, &tmpheight, &tmpformat, + compressionType)) { + srcwidth = tmpwidth; + srcheight = tmpheight; + destformat = tmpformat; + texture = tmptex; + } + } + + + /* + * texture (re)conversions + */ + if (destformat == GR_TEXFMT_ARGB_8888) { + if (srcformat == GR_TEXFMT_ARGB_8888 && (_maxbpp < 32 || _options & FORCE16BPP_TEX)) srcformat = GR_TEXFMT_ARGB_4444; + if (srcformat != GR_TEXFMT_ARGB_8888) { + tmptex = (texture == _tex1) ? _tex2 : _tex1; + if (!_txQuantize->quantize(texture, tmptex, srcwidth, srcheight, GR_TEXFMT_ARGB_8888, srcformat)) { + DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", srcformat); + return 0; + } + texture = tmptex; + destformat = srcformat; + } + } + + break; +#if !_16BPP_HACK + case GR_TEXFMT_ARGB_4444: + + int scale_shift = 0; + tmptex = (texture == _tex1) ? _tex2 : _tex1; + + switch (_options & ENHANCEMENT_MASK) { + case HQ4X_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 2) && srcheight <= (_maxheight >> 2)) { + hq4x_4444((uint8*)texture, (uint8*)tmptex, srcwidth, srcheight, srcwidth, srcwidth * 4 * 2); + scale_shift = 2; + }/* else if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + hq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight); + scale_shift = 1; + }*/ + break; + case HQ2X_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + hq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight); + scale_shift = 1; + } + break; + case HQ2XS_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + hq2xS_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight); + scale_shift = 1; + } + break; + case LQ2X_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + lq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight); + scale_shift = 1; + } + break; + case LQ2XS_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + lq2xS_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight); + scale_shift = 1; + } + break; + case X2SAI_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + Super2xSaI_4444((uint16*)texture, (uint16*)tmptex, srcwidth, srcheight, srcwidth); + scale_shift = 1; + } + break; + case X2_ENHANCEMENT: + if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) { + Texture2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight); + scale_shift = 1; + } + } + if (scale_shift) { + srcwidth <<= scale_shift; + srcheight <<= scale_shift; + texture = tmptex; + } + + if (_options & SMOOTH_FILTER_MASK) { + tmptex = (texture == _tex1) ? _tex2 : _tex1; + SmoothFilter_4444((uint16*)texture, srcwidth, srcheight, (uint16*)tmptex, (_options & SMOOTH_FILTER_MASK)); + texture = tmptex; + } else if (_options & SHARP_FILTER_MASK) { + tmptex = (texture == _tex1) ? _tex2 : _tex1; + SharpFilter_4444((uint16*)texture, srcwidth, srcheight, (uint16*)tmptex, (_options & SHARP_FILTER_MASK)); + texture = tmptex; + } + + break; + case GR_TEXFMT_ARGB_1555: + break; + case GR_TEXFMT_RGB_565: + break; + case GR_TEXFMT_ALPHA_8: + break; +#endif /* _16BPP_HACK */ + } + } + + /* fill in the texture info. */ + info->data = texture; + info->width = srcwidth; + info->height = srcheight; + info->format = destformat; + info->smallLodLog2 = _txUtil->grLodLog2(srcwidth, srcheight); + info->largeLodLog2 = info->smallLodLog2; + info->aspectRatioLog2 = _txUtil->grAspectRatioLog2(srcwidth, srcheight); + info->is_hires_tex = 0; + + /* cache the texture. */ + if (_cacheSize) _txTexCache->add(g64crc, info); + + DBG_INFO(80, L"filtered texture: %d x %d gfmt:%x\n", info->width, info->height, info->format); + + return 1; +} + +boolean +TxFilter::hirestex(uint64 g64crc, uint64 r_crc64, uint16 *palette, GHQTexInfo *info) +{ + /* NOTE: Rice CRC32 sometimes return the same value for different textures. + * As a workaround, Glide64 CRC32 is used for the key for NON-hires + * texture cache. + * + * r_crc64 = hi:palette low:texture + * (separate crc. doesn't necessary have to be rice crc) + * g64crc = texture + palette glide64 crc32 + * (can be any other crc if robust) + */ + + DBG_INFO(80, L"hirestex: r_crc64:%08X %08X, g64crc:%08X %08X\n", + (uint32)(r_crc64 >> 32), (uint32)(r_crc64 & 0xffffffff), + (uint32)(g64crc >> 32), (uint32)(g64crc & 0xffffffff)); + +#if HIRES_TEXTURE + /* check if we have it in hires memory cache. */ + if ((_options & HIRESTEXTURES_MASK) && r_crc64) { + if (_txHiResCache->get(r_crc64, info)) { + DBG_INFO(80, L"hires hit: %d x %d gfmt:%x\n", info->width, info->height, info->format); + + /* TODO: Enable emulation for special N64 combiner modes. There are few ways + * to get this done. Also applies for CI textures below. + * + * Solution 1. Load the hiresolution textures in ARGB8888 (or A8, IA88) format + * to cache. When a cache is hit, then we take the modes passed in from Glide64 + * (also TODO) and apply the modification. Then we do color reduction or format + * conversion or compression if desired and stuff it into the non-hires texture + * cache. + * + * Solution 2. When a cache is hit and if the combiner modes are present, + * convert the texture to ARGB4444 and pass it back to Glide64 to process. + * If a texture is compressed, it needs to be decompressed first. Then add + * the processed texture to the non-hires texture cache. + * + * Solution 3. Hybrid of the above 2. Load the textures in ARGB8888 (A8, IA88) + * format. Convert the texture to ARGB4444 and pass it back to Glide64 when + * the combiner modes are present. Get the processed texture back from Glide64 + * and compress if desired and add it to the non-hires texture cache. + * + * Solution 4. Take the easy way out and forget about this whole thing. + */ + + return 1; /* yep, got it */ + } + if (_txHiResCache->get((r_crc64 & 0xffffffff), info)) { + DBG_INFO(80, L"hires hit: %d x %d gfmt:%x\n", info->width, info->height, info->format); + + /* for true CI textures, we use the passed in palette to convert to + * ARGB1555 and add it to memory cache. + * + * NOTE: we do this AFTER all other texture cache searches because + * only a few texture packs actually use true CI textures. + * + * NOTE: the pre-converted palette from Glide64 is in RGBA5551 format. + * A comp comes before RGB comp. + */ + if (palette && info->format == GR_TEXFMT_P_8) { + DBG_INFO(80, L"found GR_TEXFMT_P_8 format. Need conversion!!\n"); + + int width = info->width; + int height = info->height; + uint16 format = info->format; + /* XXX: avoid collision with zlib compression buffer in TxHiResTexture::get */ + uint8 *texture = info->data; + uint8 *tmptex = (texture == _tex1) ? _tex2 : _tex1; + + /* use palette and convert to 16bit format */ + _txQuantize->P8_16BPP((uint32*)texture, (uint32*)tmptex, info->width, info->height, (uint32*)palette); + texture = tmptex; + format = GR_TEXFMT_ARGB_1555; + +#if 1 + /* XXX: compressed if memory cache compression is ON */ + if (_options & COMPRESSION_MASK) { + tmptex = (texture == _tex1) ? _tex2 : _tex1; + if (_txQuantize->quantize(texture, tmptex, info->width, info->height, format, GR_TEXFMT_ARGB_8888)) { + texture = tmptex; + format = GR_TEXFMT_ARGB_8888; + } + if (format == GR_TEXFMT_ARGB_8888) { + tmptex = (texture == _tex1) ? _tex2 : _tex1; + if (_txQuantize->compress(texture, tmptex, + info->width, info->height, GR_TEXFMT_ARGB_1555, + &width, &height, &format, + _options & COMPRESSION_MASK)) { + texture = tmptex; + } else { + /*if (!_txQuantize->quantize(texture, tmptex, info->width, info->height, GR_TEXFMT_ARGB_8888, GR_TEXFMT_ARGB_1555)) { + DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", format); + return 0; + }*/ + texture = tmptex; + format = GR_TEXFMT_ARGB_1555; + } + } + } +#endif + + /* fill in the required info to return */ + info->data = texture; + info->width = width; + info->height = height; + info->format = format; + info->smallLodLog2 = _txUtil->grLodLog2(width, height); + info->largeLodLog2 = info->smallLodLog2; + info->aspectRatioLog2 = _txUtil->grAspectRatioLog2(width, height); + info->is_hires_tex = 1; + + /* XXX: add to hires texture cache!!! */ + _txHiResCache->add(r_crc64, info); + + DBG_INFO(80, L"GR_TEXFMT_P_8 loaded as gfmt:%x!\n", format); + } + + return 1; + } + } +#endif + + /* check if we have it in memory cache */ + if (_cacheSize && g64crc) { + if (_txTexCache->get(g64crc, info)) { + DBG_INFO(80, L"cache hit: %d x %d gfmt:%x\n", info->width, info->height, info->format); + return 1; /* yep, we've got it */ + } + } + + DBG_INFO(80, L"no cache hits.\n"); + + return 0; +} + +uint64 +TxFilter::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +{ + if (_options & (HIRESTEXTURES_MASK|DUMP_TEX)) + return _txUtil->checksum64(src, width, height, size, rowStride, palette); + + return 0; +} + +boolean +TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64) +{ + if (!_initialized) + return 0; + + if (!(_options & DUMP_TEX)) + return 0; + + DBG_INFO(80, L"gfmt = %02x n64fmt = %02x\n", gfmt, n64fmt); + DBG_INFO(80, L"hirestex: r_crc64:%08X %08X\n", + (uint32)(r_crc64 >> 32), (uint32)(r_crc64 & 0xffffffff)); + + if (!_txQuantize->quantize(src, _tex1, rowStridePixel, height, (gfmt & 0x00ff), GR_TEXFMT_ARGB_8888)) + return 0; + + src = _tex1; + + if (!_path.empty() && !_ident.empty()) { + /* dump it to disk */ + FILE *fp = NULL; + CPath tmpbuf(stdstr().FromUTF16(_path.c_str()).c_str(),""); + + /* create directories */ + tmpbuf.AppendDirectory("texture_dump"); + + if (!tmpbuf.DirectoryExists() && !tmpbuf.CreateDirectory()) + return 0; + + tmpbuf.AppendDirectory(stdstr().FromUTF16(_ident.c_str()).c_str()); + if (!tmpbuf.DirectoryExists() && !tmpbuf.CreateDirectory()) + return 0; + + tmpbuf.AppendDirectory("GlideHQ"); + if (!tmpbuf.DirectoryExists() && !tmpbuf.CreateDirectory()) + return 0; + + if ((n64fmt >> 8) == 0x2) { + tmpbuf.SetNameExtension(stdstr_f("%ls#%08X#%01X#%01X#%08X_ciByRGBA.png",_ident.c_str(),(uint32)(r_crc64 & 0xffffffff), (n64fmt >> 8),(n64fmt & 0xf),(uint32)(r_crc64 >> 32)).c_str()); + } else { + tmpbuf.SetNameExtension(stdstr_f("%ls#%08X#%01X#%01X_all.png",_ident.c_str(),(uint32)(r_crc64 & 0xffffffff),(n64fmt >> 8),(n64fmt & 0xf)).c_str()); + } +#ifdef WIN32 + if ((fp = fopen(tmpbuf, "wb")) != NULL) { +#else + char cbuf[MAX_PATH]; + wcstombs(cbuf, tmpbuf.c_str(), MAX_PATH); + if ((fp = fopen(cbuf, "wb")) != NULL) { +#endif + _txImage->writePNG(src, fp, width, height, (rowStridePixel << 2), 0x0003, 0); + fclose(fp); + return 1; + } + } + + return 0; +} + +boolean +TxFilter::reloadhirestex() +{ + DBG_INFO(80, L"Reload hires textures from texture pack.\n"); + + if (_txHiResCache->load(0)) { + if (_txHiResCache->empty()) _options &= ~HIRESTEXTURES_MASK; + else _options |= HIRESTEXTURES_MASK; + + return 1; + } + + return 0; +} diff --git a/Source/GlideHQ/TxFilter.h b/Source/GlideHQ/TxFilter.h new file mode 100644 index 000000000..fdbd0268c --- /dev/null +++ b/Source/GlideHQ/TxFilter.h @@ -0,0 +1,81 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXFILTER_H__ +#define __TXFILTER_H__ + +#include "TxInternal.h" +#include "TxQuantize.h" +#include "TxHiResCache.h" +#include "TxTexCache.h" +#include "TxUtil.h" +#include "TxImage.h" +#include + +class TxFilter +{ +private: + int _numcore; + + uint8 *_tex1; + uint8 *_tex2; + int _maxwidth; + int _maxheight; + int _maxbpp; + int _options; + int _cacheSize; + std::wstring _ident; + std::wstring _path; + TxQuantize *_txQuantize; + TxTexCache *_txTexCache; + TxHiResCache *_txHiResCache; + TxUtil *_txUtil; + TxImage *_txImage; + boolean _initialized; + void clear(); +public: + ~TxFilter(); + TxFilter(int maxwidth, + int maxheight, + int maxbpp, + int options, + int cachesize, + wchar_t *path, + wchar_t *ident, + dispInfoFuncExt callback); + boolean filter(uint8 *src, + int srcwidth, + int srcheight, + uint16 srcformat, + uint64 g64crc, /* glide64 crc, 64bit for future use */ + GHQTexInfo *info); + boolean hirestex(uint64 g64crc, /* glide64 crc, 64bit for future use */ + uint64 r_crc64, /* checksum hi:palette low:texture */ + uint16 *palette, + GHQTexInfo *info); + uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); + boolean dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64); + boolean reloadhirestex(); +}; + +#endif /* __TXFILTER_H__ */ diff --git a/Source/GlideHQ/TxFilterExport.cpp b/Source/GlideHQ/TxFilterExport.cpp new file mode 100644 index 000000000..7447c3c95 --- /dev/null +++ b/Source/GlideHQ/TxFilterExport.cpp @@ -0,0 +1,105 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef WIN32 +#pragma warning(disable: 4786) +#endif + +#include "TxFilter.h" + +TxFilter *txFilter = NULL; + +#ifdef __cplusplus +extern "C"{ +#endif + +TAPI boolean TAPIENTRY +txfilter_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize, + wchar_t *path, wchar_t*ident, + dispInfoFuncExt callback) +{ + if (txFilter) return 0; + + txFilter = new TxFilter(maxwidth, maxheight, maxbpp, options, cachesize, + path, ident, callback); + + return (txFilter ? 1 : 0); +} + +void txfilter_shutdown(void) +{ + if (txFilter) delete txFilter; + + txFilter = NULL; +} + +TAPI boolean TAPIENTRY +txfilter(uint8 *src, int srcwidth, int srcheight, uint16 srcformat, + uint64 g64crc, GHQTexInfo *info) +{ + if (txFilter) + return txFilter->filter(src, srcwidth, srcheight, srcformat, + g64crc, info); + + return 0; +} + +TAPI boolean TAPIENTRY +txfilter_hirestex(uint64 g64crc, uint64 r_crc64, uint16 *palette, GHQTexInfo *info) +{ + if (txFilter) + return txFilter->hirestex(g64crc, r_crc64, palette, info); + + return 0; +} + +TAPI uint64 TAPIENTRY +txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +{ + if (txFilter) + return txFilter->checksum64(src, width, height, size, rowStride, palette); + + return 0; +} + +TAPI boolean TAPIENTRY +txfilter_dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64) +{ + if (txFilter) + return txFilter->dmptx(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64); + + return 0; +} + +TAPI boolean TAPIENTRY +txfilter_reloadhirestex() +{ + if (txFilter) + return txFilter->reloadhirestex(); + + return 0; +} + +#ifdef __cplusplus +} +#endif diff --git a/Source/GlideHQ/TxHiResCache.cpp b/Source/GlideHQ/TxHiResCache.cpp new file mode 100644 index 000000000..d1c2afdac --- /dev/null +++ b/Source/GlideHQ/TxHiResCache.cpp @@ -0,0 +1,1084 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* 2007 Gonetz + * Added callback to display hires texture info. */ + +#ifdef WIN32 +#pragma warning(disable: 4786) +#endif + +/* dump processed hirestextures to disk + * (0:disable, 1:enable) */ +#define DUMP_CACHE 1 + +/* handle oversized textures by + * 0: minification + * 1: Glide64 style tiling + */ +#define TEXTURE_TILING 1 + +/* use power of 2 texture size + * (0:disable, 1:enable, 2:3dfx) */ +#define POW2_TEXTURES 2 + +#if TEXTURE_TILING +#undef POW2_TEXTURES +#define POW2_TEXTURES 2 +#endif + +/* hack to reduce texture footprint to achieve + * better performace on midrange gfx cards. + * (0:disable, 1:enable) */ +#define REDUCE_TEXTURE_FOOTPRINT 0 + +/* use aggressive format assumption for quantization + * (0:disable, 1:enable, 2:extreme) */ +#define AGGRESSIVE_QUANTIZATION 1 + +#include "TxHiResCache.h" +#include "TxDbg.h" +#include +#include +#include + +TxHiResCache::~TxHiResCache() +{ +#if DUMP_CACHE + if ((_options & DUMP_HIRESTEXCACHE) && !_haveCache && !_abortLoad) { + /* dump cache to disk */ + std::wstring filename = _ident + L"_HIRESTEXTURES.dat"; + + CPath cachepath(stdstr().FromUTF16(_path.c_str()).c_str(),""); + cachepath.AppendDirectory("cache"); + int config = _options & (HIRESTEXTURES_MASK|COMPRESS_HIRESTEX|COMPRESSION_MASK|TILE_HIRESTEX|FORCE16BPP_HIRESTEX|GZ_HIRESTEXCACHE|LET_TEXARTISTS_FLY); + + TxCache::save(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config); + } +#endif + + delete _txImage; + delete _txQuantize; + delete _txReSample; +} + +TxHiResCache::TxHiResCache(int maxwidth, int maxheight, int maxbpp, int options, + const wchar_t *path, const wchar_t *ident, + dispInfoFuncExt callback + ) : TxCache((options & ~GZ_TEXCACHE), 0, path, ident, callback) +{ + _txImage = new TxImage(); + _txQuantize = new TxQuantize(); + _txReSample = new TxReSample(); + + _maxwidth = maxwidth; + _maxheight = maxheight; + _maxbpp = maxbpp; + _abortLoad = 0; + _haveCache = 0; + + /* assert local options */ + if (!(_options & COMPRESS_HIRESTEX)) + _options &= ~COMPRESSION_MASK; + + if (_path.empty() || _ident.empty()) { + _options &= ~DUMP_HIRESTEXCACHE; + return; + } + +#if DUMP_CACHE + /* read in hires texture cache */ + if (_options & DUMP_HIRESTEXCACHE) { + /* find it on disk */ + std::wstring filename = _ident + L"_HIRESTEXTURES.dat"; + CPath cachepath(stdstr().FromUTF16(_path.c_str()).c_str(),""); + cachepath.AppendDirectory("cache"); + int config = _options & (HIRESTEXTURES_MASK|COMPRESS_HIRESTEX|COMPRESSION_MASK|TILE_HIRESTEX|FORCE16BPP_HIRESTEX|GZ_HIRESTEXCACHE|LET_TEXARTISTS_FLY); + + _haveCache = TxCache::load(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config); + } +#endif + + /* read in hires textures */ + if (!_haveCache) TxHiResCache::load(0); +} + +boolean +TxHiResCache::empty() +{ + return _cache.empty(); +} + +boolean +TxHiResCache::load(boolean replace) /* 0 : reload, 1 : replace partial */ +{ + if (!_path.empty() && !_ident.empty()) { + + if (!replace) TxCache::clear(); + + CPath dir_path(stdstr().FromUTF16(_path.c_str()).c_str(),""); + + switch (_options & HIRESTEXTURES_MASK) { + case GHQ_HIRESTEXTURES: + break; + case RICE_HIRESTEXTURES: + INFO(80, L"-----\n"); + INFO(80, L"using Rice hires texture format...\n"); + INFO(80, L" must be one of the following;\n"); + INFO(80, L" 1) *_rgb.png + *_a.png\n"); + INFO(80, L" 2) *_all.png\n"); + INFO(80, L" 3) *_ciByRGBA.png\n"); + INFO(80, L" 4) *_allciByRGBA.png\n"); + INFO(80, L" 5) *_ci.bmp\n"); + INFO(80, L" usage of only 2) and 3) highly recommended!\n"); + INFO(80, L" folder names must be in US-ASCII characters!\n"); + + dir_path.AppendDirectory("hires_texture"); + dir_path.AppendDirectory(stdstr().FromUTF16(_ident.c_str()).c_str()); + loadHiResTextures(dir_path, replace); + break; + case JABO_HIRESTEXTURES: + ; + } + + return 1; + } + + return 0; +} + +boolean +TxHiResCache::loadHiResTextures(LPCSTR dir_path, boolean replace) +{ + DBG_INFO(80, L"-----\n"); + DBG_INFO(80, L"path: %s\n", dir_path); + +// _asm int 3 +#ifdef tofix + /* find it on disk */ + if (!boost::filesystem::exists(dir_path)) { + INFO(80, L"Error: path not found!\n"); + return 0; + } + + /* XXX: deal with UNICODE fiasco! + * stupidity flows forth beneath this... + * + * I opted to use chdir in order to use fopen() for windows 9x. + */ +#ifdef WIN32 + wchar_t curpath[MAX_PATH]; + GETCWD(MAX_PATH, curpath); + CHDIR(dir_path.string().c_str()); +#else + char curpath[MAX_PATH]; + char cbuf[MAX_PATH]; + wcstombs(cbuf, dir_path.string().c_str(), MAX_PATH); + GETCWD(MAX_PATH, curpath); + CHDIR(cbuf); +#endif + + /* NOTE: I could use the boost::wdirectory_iterator and boost::wpath + * to resolve UNICODE file names and paths. But then, _wfopen() is + * required to get the file descriptor for MS Windows to pass into + * libpng, which is incompatible with Win9x. Win9x's fopen() cannot + * handle UNICODE names. UNICODE capable boost::filesystem is available + * with Boost1.34.1 built with VC8.0 (bjam --toolset=msvc-8.0 stage). + * + * RULE OF THUMB: NEVER save texture packs in NON-ASCII names!! + */ + boost::filesystem::wdirectory_iterator it(dir_path); + boost::filesystem::wdirectory_iterator end_it; /* default construction yields past-the-end */ + + for (; it != end_it; ++it) { + + if (KBHIT(0x1B)) { + _abortLoad = 1; + if (_callback) (*_callback)(L"Aborted loading hiresolution texture!\n"); + INFO(80, L"Error: aborted loading hiresolution texture!\n"); + } + if (_abortLoad) break; + + /* recursive read into sub-directory */ + if (boost::filesystem::is_directory(it->status())) { + loadHiResTextures(it->path(), replace); + continue; + } + + DBG_INFO(80, L"-----\n"); + DBG_INFO(80, L"file: %ls\n", it->path().leaf().c_str()); + + int width = 0, height = 0; + uint16 format = 0; + uint8 *tex = NULL; + int tmpwidth = 0, tmpheight = 0; + uint16 tmpformat = 0; + uint8 *tmptex= NULL; + int untiled_width = 0, untiled_height = 0; + uint16 destformat = 0; + + /* Rice hi-res textures: begin + */ + uint32 chksum = 0, fmt = 0, siz = 0, palchksum = 0; + char *pfname = NULL, fname[MAX_PATH]; + std::string ident; + FILE *fp = NULL; + + wcstombs(fname, _ident.c_str(), MAX_PATH); + /* XXX case sensitivity fiasco! + * files must use _a, _rgb, _all, _allciByRGBA, _ciByRGBA, _ci + * and file extensions must be in lower case letters! */ +#ifdef WIN32 + { + unsigned int i; + for (i = 0; i < strlen(fname); i++) fname[i] = tolower(fname[i]); + } +#endif + ident.assign(fname); + + /* read in Rice's file naming convention */ +#define CRCFMTSIZ_LEN 13 +#define PALCRC_LEN 9 + wcstombs(fname, it->path().leaf().c_str(), MAX_PATH); + /* XXX case sensitivity fiasco! + * files must use _a, _rgb, _all, _allciByRGBA, _ciByRGBA, _ci + * and file extensions must be in lower case letters! */ +#ifdef WIN32 + { + unsigned int i; + for (i = 0; i < strlen(fname); i++) fname[i] = tolower(fname[i]); + } +#endif + pfname = fname + strlen(fname) - 4; + if (!(pfname == strstr(fname, ".png") || + pfname == strstr(fname, ".bmp") || + pfname == strstr(fname, ".dds"))) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: not png or bmp or dds!\n"); + continue; + } + pfname = strstr(fname, ident.c_str()); + if (pfname != fname) pfname = 0; + if (pfname) { + if (sscanf(pfname + ident.size(), "#%08X#%01X#%01X#%08X", &chksum, &fmt, &siz, &palchksum) == 4) + pfname += (ident.size() + CRCFMTSIZ_LEN + PALCRC_LEN); + else if (sscanf(pfname + ident.size(), "#%08X#%01X#%01X", &chksum, &fmt, &siz) == 3) + pfname += (ident.size() + CRCFMTSIZ_LEN); + else + pfname = 0; + } + if (!pfname) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: not Rice texture naming convention!\n"); + continue; + } + if (!chksum) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: crc32 = 0!\n"); + continue; + } + + /* check if we already have it in hires texture cache */ + if (!replace) { + uint64 chksum64 = (uint64)palchksum; + chksum64 <<= 32; + chksum64 |= (uint64)chksum; + if (TxCache::is_cached(chksum64)) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: already cached! duplicate texture!\n"); + continue; + } + } + + DBG_INFO(80, L"rom: %ls chksum:%08X %08X fmt:%x size:%x\n", _ident.c_str(), chksum, palchksum, fmt, siz); + + /* Deal with the wackiness some texture packs utilize Rice format. + * Read in the following order: _a.* + _rgb.*, _all.png _ciByRGBA.png, + * _allciByRGBA.png, and _ci.bmp. PNG are prefered over BMP. + * + * For some reason there are texture packs that include them all. Some + * even have RGB textures named as _all.* and ARGB textures named as + * _rgb.*... Someone pleeeez write a GOOD guideline for the texture + * designers!!! + * + * We allow hires textures to have higher bpp than the N64 originals. + */ + /* N64 formats + * Format: 0 - RGBA, 1 - YUV, 2 - CI, 3 - IA, 4 - I + * Size: 0 - 4bit, 1 - 8bit, 2 - 16bit, 3 - 32 bit + */ + + /* + * read in _rgb.* and _a.* + */ + if (pfname == strstr(fname, "_rgb.") || pfname == strstr(fname, "_a.")) { + strcpy(pfname, "_rgb.png"); + if (!boost::filesystem::exists(fname)) { + strcpy(pfname, "_rgb.bmp"); + if (!boost::filesystem::exists(fname)) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: missing _rgb.*! _a.* must be paired with _rgb.*!\n"); + continue; + } + } + /* _a.png */ + strcpy(pfname, "_a.png"); + if ((fp = fopen(fname, "rb")) != NULL) { + tmptex = _txImage->readPNG(fp, &tmpwidth, &tmpheight, &tmpformat); + fclose(fp); + } + if (!tmptex) { + /* _a.bmp */ + strcpy(pfname, "_a.bmp"); + if ((fp = fopen(fname, "rb")) != NULL) { + tmptex = _txImage->readBMP(fp, &tmpwidth, &tmpheight, &tmpformat); + fclose(fp); + } + } + /* _rgb.png */ + strcpy(pfname, "_rgb.png"); + if ((fp = fopen(fname, "rb")) != NULL) { + tex = _txImage->readPNG(fp, &width, &height, &format); + fclose(fp); + } + if (!tex) { + /* _rgb.bmp */ + strcpy(pfname, "_rgb.bmp"); + if ((fp = fopen(fname, "rb")) != NULL) { + tex = _txImage->readBMP(fp, &width, &height, &format); + fclose(fp); + } + } + if (tmptex) { + /* check if _rgb.* and _a.* have matching size and format. */ + if (!tex || width != tmpwidth || height != tmpheight || + format != GR_TEXFMT_ARGB_8888 || tmpformat != GR_TEXFMT_ARGB_8888) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + if (!tex) { + INFO(80, L"Error: missing _rgb.*!\n"); + } else if (width != tmpwidth || height != tmpheight) { + INFO(80, L"Error: _rgb.* and _a.* have mismatched width or height!\n"); + } else if (format != GR_TEXFMT_ARGB_8888 || tmpformat != GR_TEXFMT_ARGB_8888) { + INFO(80, L"Error: _rgb.* or _a.* not in 32bit color!\n"); + } + if (tex) free(tex); + if (tmptex) free(tmptex); + tex = NULL; + tmptex = NULL; + continue; + } + } + /* make adjustments */ + if (tex) { + if (tmptex) { + /* merge (A)RGB and A comp */ + DBG_INFO(80, L"merge (A)RGB and A comp\n"); + int i; + for (i = 0; i < height * width; i++) { +#if 1 + /* use R comp for alpha. this is what Rice uses. sigh... */ + ((uint32*)tex)[i] &= 0x00ffffff; + ((uint32*)tex)[i] |= ((((uint32*)tmptex)[i] & 0x00ff0000) << 8); +#endif +#if 0 + /* use libpng style grayscale conversion */ + uint32 texel = ((uint32*)tmptex)[i]; + uint32 acomp = (((texel >> 16) & 0xff) * 6969 + + ((texel >> 8) & 0xff) * 23434 + + ((texel ) & 0xff) * 2365) / 32768; + ((uint32*)tex)[i] = (acomp << 24) | (((uint32*)tex)[i] & 0x00ffffff); +#endif +#if 0 + /* use the standard NTSC gray scale conversion */ + uint32 texel = ((uint32*)tmptex)[i]; + uint32 acomp = (((texel >> 16) & 0xff) * 299 + + ((texel >> 8) & 0xff) * 587 + + ((texel ) & 0xff) * 114) / 1000; + ((uint32*)tex)[i] = (acomp << 24) | (((uint32*)tex)[i] & 0x00ffffff); +#endif + } + free(tmptex); + tmptex = NULL; + } else { + /* clobber A comp. never a question of alpha. only RGB used. */ +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Warning: missing _a.*! only using _rgb.*. treat as opaque texture.\n"); + int i; + for (i = 0; i < height * width; i++) { + ((uint32*)tex)[i] |= 0xff000000; + } + } + } + } else + + /* + * read in _all.png, _all.dds, _allciByRGBA.png, _allciByRGBA.dds + * _ciByRGBA.png, _ciByRGBA.dds, _ci.bmp + */ + if (pfname == strstr(fname, "_all.png") || + pfname == strstr(fname, "_all.dds") || +#ifdef WIN32 + pfname == strstr(fname, "_allcibyrgba.png") || + pfname == strstr(fname, "_allcibyrgba.dds") || + pfname == strstr(fname, "_cibyrgba.png") || + pfname == strstr(fname, "_cibyrgba.dds") || +#else + pfname == strstr(fname, "_allciByRGBA.png") || + pfname == strstr(fname, "_allciByRGBA.dds") || + pfname == strstr(fname, "_ciByRGBA.png") || + pfname == strstr(fname, "_ciByRGBA.dds") || +#endif + pfname == strstr(fname, "_ci.bmp")) { + if ((fp = fopen(fname, "rb")) != NULL) { + if (strstr(fname, ".png")) tex = _txImage->readPNG(fp, &width, &height, &format); + else if (strstr(fname, ".dds")) tex = _txImage->readDDS(fp, &width, &height, &format); + else tex = _txImage->readBMP(fp, &width, &height, &format); + fclose(fp); + } + /* XXX: auto-adjustment of dxt dds textures unsupported for now */ + if (tex && strstr(fname, ".dds")) { + const float aspectratio = (width > height) ? (float)width/(float)height : (float)height/(float)width; + if (!(aspectratio == 1.0 || + aspectratio == 2.0 || + aspectratio == 4.0 || + aspectratio == 8.0)) { + free(tex); + tex = NULL; +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: W:H aspect ratio range not 8:1 - 1:8!\n"); + continue; + } + if (width != _txReSample->nextPow2(width) || + height != _txReSample->nextPow2(height)) { + free(tex); + tex = NULL; +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: not power of 2 size!\n"); + continue; + } + } + } + + /* if we do not have a texture at this point we are screwed */ + if (!tex) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: load failed!\n"); + continue; + } + DBG_INFO(80, L"read in as %d x %d gfmt:%x\n", tmpwidth, tmpheight, tmpformat); + + /* check if size and format are OK */ + if (!(format == GR_TEXFMT_ARGB_8888 || + format == GR_TEXFMT_P_8 || + format == GR_TEXFMT_ARGB_CMP_DXT1 || + format == GR_TEXFMT_ARGB_CMP_DXT3 || + format == GR_TEXFMT_ARGB_CMP_DXT5) || + (width * height) < 4) { /* TxQuantize requirement: width * height must be 4 or larger. */ + free(tex); + tex = NULL; +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + INFO(80, L"Error: not width * height > 4 or 8bit palette color or 32bpp or dxt1 or dxt3 or dxt5!\n"); + continue; + } + + /* analyze and determine best format to quantize */ + if (format == GR_TEXFMT_ARGB_8888) { + int i; + int alphabits = 0; + int fullalpha = 0; + boolean intensity = 1; + + if (!(_options & LET_TEXARTISTS_FLY)) { + /* HACK ALERT! */ + /* Account for Rice's weirdness with fmt:0 siz:2 textures. + * Although the conditions are relaxed with other formats, + * the D3D RGBA5551 surface is used for this format in certain + * cases. See Nintemod's SuperMario64 life gauge and power + * meter. The same goes for fmt:2 textures. See Mollymutt's + * PaperMario text. */ + if ((fmt == 0 && siz == 2) || fmt == 2) { + DBG_INFO(80, L"Remove black, white, etc borders along the alpha edges.\n"); + /* round A comp */ + for (i = 0; i < height * width; i++) { + uint32 texel = ((uint32*)tex)[i]; + ((uint32*)tex)[i] = ((texel & 0xff000000) == 0xff000000 ? 0xff000000 : 0) | + (texel & 0x00ffffff); + } + /* Substitute texel color with the average of the surrounding + * opaque texels. This removes borders regardless of hardware + * texture filtering (bilinear, etc). */ + int j; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + uint32 texel = ((uint32*)tex)[i * width + j]; + if ((texel & 0xff000000) != 0xff000000) { + uint32 tmptexel[8]; + uint32 k, numtexel, r, g, b; + numtexel = r = g = b = 0; + memset(&tmptexel, 0, sizeof(tmptexel)); + if (i > 0) { + tmptexel[0] = ((uint32*)tex)[(i - 1) * width + j]; /* north */ + if (j > 0) tmptexel[1] = ((uint32*)tex)[(i - 1) * width + j - 1]; /* north-west */ + if (j < width - 1) tmptexel[2] = ((uint32*)tex)[(i - 1) * width + j + 1]; /* north-east */ + } + if (i < height - 1) { + tmptexel[3] = ((uint32*)tex)[(i + 1) * width + j]; /* south */ + if (j > 0) tmptexel[4] = ((uint32*)tex)[(i + 1) * width + j - 1]; /* south-west */ + if (j < width - 1) tmptexel[5] = ((uint32*)tex)[(i + 1) * width + j + 1]; /* south-east */ + } + if (j > 0) tmptexel[6] = ((uint32*)tex)[i * width + j - 1]; /* west */ + if (j < width - 1) tmptexel[7] = ((uint32*)tex)[i * width + j + 1]; /* east */ + for (k = 0; k < 8; k++) { + if ((tmptexel[k] & 0xff000000) == 0xff000000) { + r += ((tmptexel[k] & 0x00ff0000) >> 16); + g += ((tmptexel[k] & 0x0000ff00) >> 8); + b += ((tmptexel[k] & 0x000000ff) ); + numtexel++; + } + } + if (numtexel) { + ((uint32*)tex)[i * width + j] = ((r / numtexel) << 16) | + ((g / numtexel) << 8) | + ((b / numtexel) ); + } else { + ((uint32*)tex)[i * width + j] = texel & 0x00ffffff; + } + } + } + } + } + } + + /* simple analysis of texture */ + for (i = 0; i < height * width; i++) { + uint32 texel = ((uint32*)tex)[i]; + if (alphabits != 8) { +#if AGGRESSIVE_QUANTIZATION + if ((texel & 0xff000000) < 0x00000003) { + alphabits = 1; + fullalpha++; + } else if ((texel & 0xff000000) < 0xfe000000) { + alphabits = 8; + } +#else + if ((texel & 0xff000000) == 0x00000000) { + alphabits = 1; + fullalpha++; + } else if ((texel & 0xff000000) != 0xff000000) { + alphabits = 8; + } +#endif + } + if (intensity) { + int rcomp = (texel >> 16) & 0xff; + int gcomp = (texel >> 8) & 0xff; + int bcomp = (texel ) & 0xff; +#if AGGRESSIVE_QUANTIZATION + if (abs(rcomp - gcomp) > 8 || abs(rcomp - bcomp) > 8 || abs(gcomp - bcomp) > 8) intensity = 0; +#else + if (rcomp != gcomp || rcomp != bcomp || gcomp != bcomp) intensity = 0; +#endif + } + if (!intensity && alphabits == 8) break; + } + DBG_INFO(80, L"required alpha bits:%d zero acomp texels:%d rgb as intensity:%d\n", alphabits, fullalpha, intensity); + + /* preparations based on above analysis */ +#if !REDUCE_TEXTURE_FOOTPRINT + if (_maxbpp < 32 || _options & (FORCE16BPP_HIRESTEX|COMPRESSION_MASK)) { +#endif + if (alphabits == 0) destformat = GR_TEXFMT_RGB_565; + else if (alphabits == 1) destformat = GR_TEXFMT_ARGB_1555; + else destformat = GR_TEXFMT_ARGB_8888; +#if !REDUCE_TEXTURE_FOOTPRINT + } else { + destformat = GR_TEXFMT_ARGB_8888; + } +#endif + if (fmt == 4 && alphabits == 0) { + destformat = GR_TEXFMT_ARGB_8888; + /* Rice I format; I = (R + G + B) / 3 */ + for (i = 0; i < height * width; i++) { + uint32 texel = ((uint32*)tex)[i]; + uint32 icomp = (((texel >> 16) & 0xff) + + ((texel >> 8) & 0xff) + + ((texel ) & 0xff)) / 3; + ((uint32*)tex)[i] = (icomp << 24) | (texel & 0x00ffffff); + } + } + if (intensity) { + if (alphabits == 0) { + if (fmt == 4) destformat = GR_TEXFMT_ALPHA_8; + else destformat = GR_TEXFMT_INTENSITY_8; + } else { + destformat = GR_TEXFMT_ALPHA_INTENSITY_88; + } + } + + DBG_INFO(80, L"best gfmt:%x\n", destformat); + } + /* + * Rice hi-res textures: end */ + + + /* XXX: only ARGB8888 for now. comeback to this later... */ + if (format == GR_TEXFMT_ARGB_8888) { + +#if TEXTURE_TILING + + /* Glide64 style texture tiling */ + /* NOTE: narrow wide textures can be tiled into 256x256 size textures */ + + /* adjust texture size to allow tiling for V1, Rush, V2, Banshee, V3 */ + /* NOTE: we skip this for palette textures that need minification + * becasue it will look ugly. */ + + /* minification */ + { + int ratio = 1; + + /* minification to enable glide64 style texture tiling */ + /* determine the minification ratio to tile the texture into 256x256 size */ + if ((_options & TILE_HIRESTEX) && _maxwidth >= 256 && _maxheight >= 256) { + DBG_INFO(80, L"determine minification ratio to tile\n"); + tmpwidth = width; + tmpheight = height; + if (height > 256) { + ratio = ((height - 1) >> 8) + 1; + tmpwidth = width / ratio; + tmpheight = height / ratio; + DBG_INFO(80, L"height > 256, minification ratio:%d %d x %d -> %d x %d\n", + ratio, width, height, tmpwidth, tmpheight); + } + if (tmpwidth > 256 && (((tmpwidth - 1) >> 8) + 1) * tmpheight > 256) { + ratio *= ((((((tmpwidth - 1) >> 8) + 1) * tmpheight) - 1) >> 8) + 1; + DBG_INFO(80, L"width > 256, minification ratio:%d %d x %d -> %d x %d\n", + ratio, width, height, width / ratio, height / ratio); + } + } else { + /* normal minification to fit max texture size */ + if (width > _maxwidth || height > _maxheight) { + DBG_INFO(80, L"determine minification ratio to fit max texture size\n"); + tmpwidth = width; + tmpheight = height; + while (tmpwidth > _maxwidth) { + tmpheight >>= 1; + tmpwidth >>= 1; + ratio <<= 1; + } + while (tmpheight > _maxheight) { + tmpheight >>= 1; + tmpwidth >>= 1; + ratio <<= 1; + } + DBG_INFO(80, L"minification ratio:%d %d x %d -> %d x %d\n", + ratio, width, height, tmpwidth, tmpheight); + } + } + + if (ratio > 1) { + if (!_txReSample->minify(&tex, &width, &height, ratio)) { + free(tex); + tex = NULL; + DBG_INFO(80, L"Error: minification failed!\n"); + continue; + } + } + } + + /* tiling */ + if ((_options & TILE_HIRESTEX) && _maxwidth >= 256 && _maxheight >= 256) { + boolean usetile = 0; + + /* to tile or not to tile, that is the question */ + if (width > 256 && height <= 128 && (((width - 1) >> 8) + 1) * height <= 256) { + + if (width > _maxwidth) usetile = 1; + else { + /* tile if the tiled texture memory footprint is smaller */ + int tilewidth = 256; + int tileheight = _txReSample->nextPow2((((width - 1) >> 8) + 1) * height); + tmpwidth = width; + tmpheight = height; + + /* 3dfx Glide3 tmpheight, W:H aspect ratio range (8:1 - 1:8) */ + if (tilewidth > (tileheight << 3)) tileheight = tilewidth >> 3; + + /* HACKALERT: see TxReSample::pow2(); */ + if (tmpwidth > 64) tmpwidth -= 4; + else if (tmpwidth > 16) tmpwidth -= 2; + else if (tmpwidth > 4) tmpwidth -= 1; + + if (tmpheight > 64) tmpheight -= 4; + else if (tmpheight > 16) tmpheight -= 2; + else if (tmpheight > 4) tmpheight -= 1; + + tmpwidth = _txReSample->nextPow2(tmpwidth); + tmpheight = _txReSample->nextPow2(tmpheight); + + /* 3dfx Glide3 tmpheight, W:H aspect ratio range (8:1 - 1:8) */ + if (tmpwidth > tmpheight) { + if (tmpwidth > (tmpheight << 3)) tmpheight = tmpwidth >> 3; + } else { + if (tmpheight > (tmpwidth << 3)) tmpwidth = tmpheight >> 3; + } + + usetile = (tilewidth * tileheight < tmpwidth * tmpheight); + } + + } + + /* tile it! do the actual tiling into 256x256 size */ + if (usetile) { + DBG_INFO(80, L"Glide64 style texture tiling\n"); + + int x, y, z, ratio, offset; + offset = 0; + ratio = ((width - 1) >> 8) + 1; + tmptex = (uint8 *)malloc(_txUtil->sizeofTx(256, height * ratio, format)); + if (tmptex) { + for (x = 0; x < ratio; x++) { + for (y = 0; y < height; y++) { + if (x < ratio - 1) { + memcpy(&tmptex[offset << 2], &tex[(x * 256 + y * width) << 2], 256 << 2); + } else { + for (z = 0; z < width - 256 * (ratio - 1); z++) { + ((uint32*)tmptex)[offset + z] = ((uint32*)tex)[x * 256 + y * width + z]; + } + for (; z < 256; z++) { + ((uint32*)tmptex)[offset + z] = ((uint32*)tmptex)[offset + z - 1]; + } + } + offset += 256; + } + } + free(tex); + tex = tmptex; + untiled_width = width; + untiled_height = height; + width = 256; + height *= ratio; + DBG_INFO(80, L"Tiled: %d x %d -> %d x %d\n", untiled_width, untiled_height, width, height); + } + } + } + +#else /* TEXTURE_TILING */ + + /* minification */ + if (width > _maxwidth || height > _maxheight) { + int ratio = 1; + if (width / _maxwidth > height / _maxheight) { + ratio = (int)ceil((double)width / _maxwidth); + } else { + ratio = (int)ceil((double)height / _maxheight); + } + if (!_txReSample->minify(&tex, &width, &height, ratio)) { + free(tex); + tex = NULL; + DBG_INFO(80, L"Error: minification failed!\n"); + continue; + } + } + +#endif /* TEXTURE_TILING */ + + /* texture compression */ + if ((_options & COMPRESSION_MASK) && + (width >= 64 && height >= 64) /* Texture compression is not suitable for low pixel coarse detail + * textures. The assumption here is that textures larger than 64x64 + * have enough detail to produce decent quality when compressed. The + * down side is that narrow stripped textures that the N64 often use + * for large background textures are also ignored. It would be more + * reasonable if decisions are made based on fourier-transform + * spectrum or RMS error. + * + * NOTE: texture size must be checked before expanding to pow2 size. + */ + ) { + uint32 alpha = 0; + int dataSize = 0; + int compressionType = _options & COMPRESSION_MASK; + +#if POW2_TEXTURES +#if (POW2_TEXTURES == 2) + /* 3dfx Glide3x aspect ratio (8:1 - 1:8) */ + if (!_txReSample->nextPow2(&tex, &width , &height, 32, 1)) { +#else + /* normal pow2 expansion */ + if (!_txReSample->nextPow2(&tex, &width , &height, 32, 0)) { +#endif + free(tex); + tex = NULL; + DBG_INFO(80, L"Error: aspect ratio adjustment failed!\n"); + continue; + } +#endif + + switch (_options & COMPRESSION_MASK) { + case S3TC_COMPRESSION: + switch (destformat) { + case GR_TEXFMT_ARGB_8888: +#if GLIDE64_DXTN + case GR_TEXFMT_ARGB_1555: /* for ARGB1555 use DXT5 instead of DXT1 */ +#endif + case GR_TEXFMT_ALPHA_INTENSITY_88: + dataSize = width * height; + break; +#if !GLIDE64_DXTN + case GR_TEXFMT_ARGB_1555: +#endif + case GR_TEXFMT_RGB_565: + case GR_TEXFMT_INTENSITY_8: + dataSize = (width * height) >> 1; + break; + case GR_TEXFMT_ALPHA_8: /* no size benefit with dxtn */ + ; + } + break; + case FXT1_COMPRESSION: + switch (destformat) { + case GR_TEXFMT_ARGB_1555: + case GR_TEXFMT_RGB_565: + case GR_TEXFMT_INTENSITY_8: + dataSize = (width * height) >> 1; + break; + /* XXX: textures that use 8bit alpha channel look bad with the current + * fxt1 library, so we substitute it with dxtn for now. afaik all gfx + * cards that support fxt1 also support dxtn. (3dfx and Intel) */ + case GR_TEXFMT_ALPHA_INTENSITY_88: + case GR_TEXFMT_ARGB_8888: + compressionType = S3TC_COMPRESSION; + dataSize = width * height; + break; + case GR_TEXFMT_ALPHA_8: /* no size benefit with dxtn */ + ; + } + } + /* compress it! */ + if (dataSize) { +#if 0 /* TEST: dither before compression for better results with gradients */ + tmptex = (uint8 *)malloc(_txUtil->sizeofTx(width, height, destformat)); + if (tmptex) { + if (_txQuantize->quantize(tex, tmptex, width, height, GR_TEXFMT_ARGB_8888, destformat, 0)) + _txQuantize->quantize(tmptex, tex, width, height, destformat, GR_TEXFMT_ARGB_8888, 0); + free(tmptex); + } +#endif + tmptex = (uint8 *)malloc(dataSize); + if (tmptex) { + if (_txQuantize->compress(tex, tmptex, + width, height, destformat, + &tmpwidth, &tmpheight, &tmpformat, + compressionType)) { + free(tex); + tex = tmptex; + width = tmpwidth; + height = tmpheight; + format = destformat = tmpformat; + } else { + free(tmptex); + } + } + } + + } else { + +#if POW2_TEXTURES +#if (POW2_TEXTURES == 2) + /* 3dfx Glide3x aspect ratio (8:1 - 1:8) */ + if (!_txReSample->nextPow2(&tex, &width , &height, 32, 1)) { +#else + /* normal pow2 expansion */ + if (!_txReSample->nextPow2(&tex, &width , &height, 32, 0)) { +#endif + free(tex); + tex = NULL; + DBG_INFO(80, L"Error: aspect ratio adjustment failed!\n"); + continue; + } +#endif + } + + /* quantize */ + { + tmptex = (uint8 *)malloc(_txUtil->sizeofTx(width, height, destformat)); + if (tmptex) { + switch (destformat) { + case GR_TEXFMT_ARGB_8888: + case GR_TEXFMT_ARGB_4444: +#if !REDUCE_TEXTURE_FOOTPRINT + if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX) +#endif + destformat = GR_TEXFMT_ARGB_4444; + break; + case GR_TEXFMT_ARGB_1555: +#if !REDUCE_TEXTURE_FOOTPRINT + if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX) +#endif + destformat = GR_TEXFMT_ARGB_1555; + break; + case GR_TEXFMT_RGB_565: +#if !REDUCE_TEXTURE_FOOTPRINT + if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX) +#endif + destformat = GR_TEXFMT_RGB_565; + break; + case GR_TEXFMT_ALPHA_INTENSITY_88: + case GR_TEXFMT_ALPHA_INTENSITY_44: +#if !REDUCE_TEXTURE_FOOTPRINT + destformat = GR_TEXFMT_ALPHA_INTENSITY_88; +#else + destformat = GR_TEXFMT_ALPHA_INTENSITY_44; +#endif + break; + case GR_TEXFMT_ALPHA_8: + destformat = GR_TEXFMT_ALPHA_8; /* yes, this is correct. ALPHA_8 instead of INTENSITY_8 */ + break; + case GR_TEXFMT_INTENSITY_8: + destformat = GR_TEXFMT_INTENSITY_8; + } + if (_txQuantize->quantize(tex, tmptex, width, height, GR_TEXFMT_ARGB_8888, destformat, 0)) { + format = destformat; + free(tex); + tex = tmptex; + } + } + } + + } + + + /* last minute validations */ + if (!tex || !chksum || !width || !height || !format || width > _maxwidth || height > _maxheight) { +#if !DEBUG + INFO(80, L"-----\n"); + INFO(80, L"path: %ls\n", dir_path.string().c_str()); + INFO(80, L"file: %ls\n", it->path().leaf().c_str()); +#endif + if (tex) { + free(tex); + tex = NULL; + INFO(80, L"Error: bad format or size! %d x %d gfmt:%x\n", width, height, format); + } else { + INFO(80, L"Error: load failed!!\n"); + } + continue; + } + + /* load it into hires texture cache. */ + { + uint64 chksum64 = (uint64)palchksum; + chksum64 <<= 32; + chksum64 |= (uint64)chksum; + + GHQTexInfo tmpInfo; + memset(&tmpInfo, 0, sizeof(GHQTexInfo)); + + tmpInfo.data = tex; + tmpInfo.width = width; + tmpInfo.height = height; + tmpInfo.format = format; + tmpInfo.largeLodLog2 = _txUtil->grLodLog2(width, height); + tmpInfo.smallLodLog2 = tmpInfo.largeLodLog2; + tmpInfo.aspectRatioLog2 = _txUtil->grAspectRatioLog2(width, height); + tmpInfo.is_hires_tex = 1; + +#if TEXTURE_TILING + /* Glide64 style texture tiling. */ + if (untiled_width && untiled_height) { + tmpInfo.tiles = ((untiled_width - 1) >> 8) + 1; + tmpInfo.untiled_width = untiled_width; + tmpInfo.untiled_height = untiled_height; + } +#endif + + /* remove redundant in cache */ + if (replace && TxCache::del(chksum64)) { + DBG_INFO(80, L"removed duplicate old cache.\n"); + } + + /* add to cache */ + if (TxCache::add(chksum64, &tmpInfo)) { + /* Callback to display hires texture info. + * Gonetz */ + if (_callback) { + wchar_t tmpbuf[MAX_PATH]; + mbstowcs(tmpbuf, fname, MAX_PATH); + (*_callback)(L"[%d] total mem:%.2fmb - %ls\n", _cache.size(), (float)_totalSize/1000000, tmpbuf); + } + DBG_INFO(80, L"texture loaded!\n"); + } + free(tex); + } + + } + + CHDIR(curpath); +#endif + return 1; +} diff --git a/Source/GlideHQ/TxHiResCache.h b/Source/GlideHQ/TxHiResCache.h new file mode 100644 index 000000000..58868ffa6 --- /dev/null +++ b/Source/GlideHQ/TxHiResCache.h @@ -0,0 +1,59 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXHIRESCACHE_H__ +#define __TXHIRESCACHE_H__ + +/* support hires textures + * 0: disable + * 1: enable + */ +#define HIRES_TEXTURE 1 + +#include "TxCache.h" +#include "TxQuantize.h" +#include "TxImage.h" +#include "TxReSample.h" + +class TxHiResCache : public TxCache +{ +private: + int _maxwidth; + int _maxheight; + int _maxbpp; + boolean _haveCache; + boolean _abortLoad; + TxImage *_txImage; + TxQuantize *_txQuantize; + TxReSample *_txReSample; + boolean loadHiResTextures(LPCSTR dir_path, boolean replace); +public: + ~TxHiResCache(); + TxHiResCache(int maxwidth, int maxheight, int maxbpp, int options, + const wchar_t *path, const wchar_t *ident, + dispInfoFuncExt callback); + boolean empty(); + boolean load(boolean replace); +}; + +#endif /* __TXHIRESCACHE_H__ */ diff --git a/Source/GlideHQ/TxImage.cpp b/Source/GlideHQ/TxImage.cpp new file mode 100644 index 000000000..ba087d862 --- /dev/null +++ b/Source/GlideHQ/TxImage.cpp @@ -0,0 +1,799 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* use power of 2 texture size + * (0:disable, 1:enable, 2:3dfx) */ +#define POW2_TEXTURES 0 + +/* check 8 bytes. use a larger value if needed. */ +#define PNG_CHK_BYTES 8 + +#include "TxImage.h" +#include "TxReSample.h" +#include "TxDbg.h" +#include + +boolean +TxImage::getPNGInfo(FILE *fp, png_structp *png_ptr, png_infop *info_ptr) +{ + unsigned char sig[PNG_CHK_BYTES]; + + /* check for valid file pointer */ + if (!fp) + return 0; + + /* check if file is PNG */ + if (fread(sig, 1, PNG_CHK_BYTES, fp) != PNG_CHK_BYTES) + return 0; + + if (png_sig_cmp(sig, 0, PNG_CHK_BYTES) != 0) + return 0; + + /* get PNG file info */ + *png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (!*png_ptr) + return 0; + + *info_ptr = png_create_info_struct(*png_ptr); + if (!*info_ptr) { + png_destroy_read_struct(png_ptr, NULL, NULL); + return 0; + } + + if (setjmp(png_jmpbuf(*png_ptr))) { + DBG_INFO(80, L"error reading png!\n"); + png_destroy_read_struct(png_ptr, info_ptr, NULL); + return 0; + } + + png_init_io(*png_ptr, fp); + png_set_sig_bytes(*png_ptr, PNG_CHK_BYTES); + png_read_info(*png_ptr, *info_ptr); + + return 1; +} + +uint8* +TxImage::readPNG(FILE* fp, int* width, int* height, uint16* format) +{ + /* NOTE: returned image format is GR_TEXFMT_ARGB_8888 */ + + png_structp png_ptr; + png_infop info_ptr; + uint8 *image = NULL; + int bit_depth, color_type, interlace_type, compression_type, filter_type, + row_bytes, o_width, o_height, num_pas; + + /* initialize */ + *width = 0; + *height = 0; + *format = 0; + + /* check if we have a valid png file */ + if (!fp) + return NULL; + + if (!getPNGInfo(fp, &png_ptr, &info_ptr)) { + INFO(80, L"error reading png file! png image is corrupt.\n"); + return NULL; + } + + png_get_IHDR(png_ptr, info_ptr, + (png_uint_32*)&o_width, (png_uint_32*)&o_height, &bit_depth, &color_type, + &interlace_type, &compression_type, &filter_type); + + DBG_INFO(80, L"png format %d x %d bitdepth:%d color:%x interlace:%x compression:%x filter:%x\n", + o_width, o_height, bit_depth, color_type, + interlace_type, compression_type, filter_type); + + /* transformations */ + + /* Rice hi-res textures + * _all.png + * _rgb.png, _a.png + * _ciByRGBA.png + * _allciByRGBA.png + */ + + /* strip if color channel is larger than 8 bits */ + if (bit_depth > 8) { + png_set_strip_16(png_ptr); + bit_depth = 8; + } + +#if 1 + /* These are not really required per Rice format spec, + * but is done just in case someone uses them. + */ + /* convert palette color to rgb color */ + if (color_type == PNG_COLOR_TYPE_PALETTE) { + png_set_palette_to_rgb(png_ptr); + color_type = PNG_COLOR_TYPE_RGB; + } + + /* expand 1,2,4 bit gray scale to 8 bit gray scale */ + if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) + png_set_expand_gray_1_2_4_to_8(png_ptr); + + /* convert gray scale or gray scale + alpha to rgb color */ + if (color_type == PNG_COLOR_TYPE_GRAY || + color_type == PNG_COLOR_TYPE_GRAY_ALPHA) { + png_set_gray_to_rgb(png_ptr); + color_type = PNG_COLOR_TYPE_RGB; + } +#endif + + /* add alpha channel if any */ + if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { + png_set_tRNS_to_alpha(png_ptr); + color_type = PNG_COLOR_TYPE_RGB_ALPHA; + } + + /* convert rgb to rgba */ + if (color_type == PNG_COLOR_TYPE_RGB) { + png_set_filler(png_ptr, 0xff, PNG_FILLER_AFTER); + color_type = PNG_COLOR_TYPE_RGB_ALPHA; + } + + /* punt invalid formats */ + if (color_type != PNG_COLOR_TYPE_RGB_ALPHA) { + png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + DBG_INFO(80, L"Error: not PNG_COLOR_TYPE_RGB_ALPHA format!\n"); + return NULL; + } + + /*png_color_8p sig_bit; + if (png_get_sBIT(png_ptr, info_ptr, &sig_bit)) + png_set_shift(png_ptr, sig_bit);*/ + + /* convert rgba to bgra */ + png_set_bgr(png_ptr); + + /* turn on interlace handling to cope with the weirdness + * of texture authors using interlaced format */ + num_pas = png_set_interlace_handling(png_ptr); + + /* update info structure */ + png_read_update_info(png_ptr, info_ptr); + + /* we only get here if ARGB8888 */ + row_bytes = png_get_rowbytes(png_ptr, info_ptr); + + /* allocate memory to read in image */ + image = (uint8*)malloc(row_bytes * o_height); + + /* read in image */ + if (image) { + int pas, i; + uint8* tmpimage; + + for (pas = 0; pas < num_pas; pas++) { /* deal with interlacing */ + tmpimage = image; + + for (i = 0; i < o_height; i++) { + /* copy row */ + png_read_rows(png_ptr, &tmpimage, NULL, 1); + tmpimage += row_bytes; + } + } + + /* read rest of the info structure */ + png_read_end(png_ptr, info_ptr); + + *width = (row_bytes >> 2); + *height = o_height; + *format = GR_TEXFMT_ARGB_8888; + +#if POW2_TEXTURES + /* next power of 2 size conversions */ + /* NOTE: I can do this in the above loop for faster operations, but some + * texture packs require a workaround. see HACKALERT in nextPow2(). + */ + + TxReSample txReSample = new TxReSample; // XXX: temporary. move to a better place. + +#if (POW2_TEXTURES == 2) + if (!txReSample->nextPow2(&image, width, height, 32, 1)) { +#else + if (!txReSample->nextPow2(&image, width, height, 32, 0)) { +#endif + if (image) { + free(image); + image = NULL; + } + *width = 0; + *height = 0; + *format = 0; + } + + delete txReSample; + +#endif /* POW2_TEXTURES */ + } + + /* clean up */ + png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + +#ifdef DEBUG + if (!image) { + DBG_INFO(80, L"Error: failed to load png image!\n"); + } +#endif + + return image; +} + +boolean +TxImage::writePNG(uint8* src, FILE* fp, int width, int height, int rowStride, uint16 format, uint8 *palette) +{ + png_structp png_ptr; + png_infop info_ptr; + png_color_8 sig_bit; + png_colorp palette_ptr; + png_bytep trans_ptr;//, tex_ptr; + int bit_depth, color_type, row_bytes, num_palette; + int i; + //uint16 srcfmt, destfmt; + + if (!src || !fp) + return 0; + + png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (png_ptr == NULL) + return 0; + + info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == NULL) { + png_destroy_write_struct(&png_ptr, NULL); + return 0; + } + + /*if (setjmp(png_ptr->jmpbuf)) { + png_destroy_write_struct(&png_ptr, &info_ptr); + return 0; + }*/ + + png_init_io(png_ptr, fp); + + /* TODO: images must be converted to RGBA8888 or CI8, + * palettes need to be separated to A and RGB. */ + + /* N64 formats + * Format: 0 - RGBA, 1 - YUV, 2 - CI, 3 - IA, 4 - I + * Size: 0 - 4bit, 1 - 8bit, 2 - 16bit, 3 - 32 bit + * format = (Format << 8 | Size); + */ + + /* each channel is saved in 8bits for consistency */ + switch (format) { + case 0x0002:/* RGBA5551 */ + bit_depth = 8; + sig_bit.red = 5; + sig_bit.green = 5; + sig_bit.blue = 5; + sig_bit.alpha = 1; + color_type = PNG_COLOR_TYPE_RGB_ALPHA; + break; + case 0x0003:/* RGBA8888 */ + case 0x0302:/* IA88 */ + bit_depth = 8; + sig_bit.red = 8; + sig_bit.green = 8; + sig_bit.blue = 8; + sig_bit.alpha = 8; + color_type = PNG_COLOR_TYPE_RGB_ALPHA; + break; + case 0x0300:/* IA31 */ + bit_depth = 8; + sig_bit.red = 3; + sig_bit.green = 3; + sig_bit.blue = 3; + sig_bit.alpha = 1; + color_type = PNG_COLOR_TYPE_RGB_ALPHA; + break; + case 0x0301:/* IA44 */ + bit_depth = 8; + sig_bit.red = 4; + sig_bit.green = 4; + sig_bit.blue = 4; + sig_bit.alpha = 4; + color_type = PNG_COLOR_TYPE_RGB_ALPHA; + break; + case 0x0400:/* I4 */ + bit_depth = 8; + sig_bit.red = 4; + sig_bit.green = 4; + sig_bit.blue = 4; + color_type = PNG_COLOR_TYPE_RGB; + break; + case 0x0401:/* I8 */ + case 0x0402:/* I16 */ + bit_depth = 8; + sig_bit.red = 8; + sig_bit.green = 8; + sig_bit.blue = 8; + color_type = PNG_COLOR_TYPE_RGB; + break; + case 0x0200:/* CI4 */ + bit_depth = 8; + num_palette = 16; + color_type = PNG_COLOR_TYPE_PALETTE; + break; + case 0x0201:/* CI8 */ + bit_depth = 8; + num_palette = 256; + color_type = PNG_COLOR_TYPE_PALETTE; + break; + case 0x0102:/* YUV ? */ + case 0x0103: + default: + /* unsupported format */ + png_destroy_write_struct(&png_ptr, &info_ptr); + return 0; + } + + switch (color_type) { + case PNG_COLOR_TYPE_RGB_ALPHA: + case PNG_COLOR_TYPE_RGB: + //row_bytes = (bit_depth * width) >> 1; + row_bytes = rowStride; + png_set_bgr(png_ptr); + png_set_sBIT(png_ptr, info_ptr, &sig_bit); + break; + case PNG_COLOR_TYPE_PALETTE: + //row_bytes = (bit_depth * width) >> 3; + row_bytes = rowStride; + png_set_PLTE(png_ptr, info_ptr, palette_ptr, num_palette); + png_set_tRNS(png_ptr, info_ptr, trans_ptr, num_palette, 0); + } + + //png_set_filter(png_ptr, 0, PNG_ALL_FILTERS); + + //if (bit_depth == 16) + // png_set_swap(png_ptr); + + //if (bit_depth < 8) + // png_set_packswap(png_ptr); + + png_set_IHDR(png_ptr, info_ptr, width, height, + bit_depth, color_type, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); + + //png_set_gAMA(png_ptr, info_ptr, 1.0); + + png_write_info(png_ptr, info_ptr); + for (i = 0; i < height; i++) { + png_write_row(png_ptr, (png_bytep)src); + src += row_bytes; + } + png_write_end(png_ptr, info_ptr); + + png_destroy_write_struct(&png_ptr, &info_ptr); + + //if (tex_ptr) delete [] tex_ptr; + + return 1; +} + +boolean +TxImage::getBMPInfo(FILE* fp, BITMAPFILEHEADER* bmp_fhdr, BITMAPINFOHEADER* bmp_ihdr) +{ + /* + * read in BITMAPFILEHEADER + */ + + /* is this a BMP file? */ + if (fread(&bmp_fhdr->bfType, 2, 1, fp) != 1) + return 0; + + if (memcmp(&bmp_fhdr->bfType, "BM", 2) != 0) + return 0; + + /* get file size */ + if (fread(&bmp_fhdr->bfSize, 4, 1, fp) != 1) + return 0; + + /* reserved 1 */ + if (fread(&bmp_fhdr->bfReserved1, 2, 1, fp) != 1) + return 0; + + /* reserved 2 */ + if (fread(&bmp_fhdr->bfReserved2, 2, 1, fp) != 1) + return 0; + + /* offset to the image data */ + if (fread(&bmp_fhdr->bfOffBits, 4, 1, fp) != 1) + return 0; + + /* + * read in BITMAPINFOHEADER + */ + + /* size of BITMAPINFOHEADER */ + if (fread(&bmp_ihdr->biSize, 4, 1, fp) != 1) + return 0; + + /* is this a Windows BMP? */ + if (bmp_ihdr->biSize != 40) + return 0; + + /* width of the bitmap in pixels */ + if (fread(&bmp_ihdr->biWidth, 4, 1, fp) != 1) + return 0; + + /* height of the bitmap in pixels */ + if (fread(&bmp_ihdr->biHeight, 4, 1, fp) != 1) + return 0; + + /* number of planes (always 1) */ + if (fread(&bmp_ihdr->biPlanes, 2, 1, fp) != 1) + return 0; + + /* number of bits-per-pixel. (1, 4, 8, 16, 24, 32) */ + if (fread(&bmp_ihdr->biBitCount, 2, 1, fp) != 1) + return 0; + + /* compression for a compressed bottom-up bitmap + * 0 : uncompressed format + * 1 : run-length encoded 4 bpp format + * 2 : run-length encoded 8 bpp format + * 3 : bitfield + */ + if (fread(&bmp_ihdr->biCompression, 4, 1, fp) != 1) + return 0; + + /* size of the image in bytes */ + if (fread(&bmp_ihdr->biSizeImage, 4, 1, fp) != 1) + return 0; + + /* horizontal resolution in pixels-per-meter */ + if (fread(&bmp_ihdr->biXPelsPerMeter, 4, 1, fp) != 1) + return 0; + + /* vertical resolution in pixels-per-meter */ + if (fread(&bmp_ihdr->biYPelsPerMeter, 4, 1, fp) != 1) + return 0; + + /* number of color indexes in the color table that are actually used */ + if (fread(&bmp_ihdr->biClrUsed, 4, 1, fp) != 1) + return 0; + + /* the number of color indexes that are required for displaying */ + if (fread(&bmp_ihdr->biClrImportant, 4, 1, fp) != 1) + return 0; + + return 1; +} + +uint8* +TxImage::readBMP(FILE* fp, int* width, int* height, uint16* format) +{ + /* NOTE: returned image format; + * 4, 8bit palette bmp -> GR_TEXFMT_P_8 + * 24, 32bit bmp -> GR_TEXFMT_ARGB_8888 + */ + + uint8 *image = NULL; + uint8 *image_row = NULL; + uint8 *tmpimage = NULL; + int row_bytes, pos, i, j; + /* Windows Bitmap */ + BITMAPFILEHEADER bmp_fhdr; + BITMAPINFOHEADER bmp_ihdr; + + /* initialize */ + *width = 0; + *height = 0; + *format = 0; + + /* check if we have a valid bmp file */ + if (!fp) + return NULL; + + if (!getBMPInfo(fp, &bmp_fhdr, &bmp_ihdr)) { + INFO(80, L"error reading bitmap file! bitmap image is corrupt.\n"); + return NULL; + } + + DBG_INFO(80, L"bmp format %d x %d bitdepth:%d compression:%x offset:%d\n", + bmp_ihdr.biWidth, bmp_ihdr.biHeight, bmp_ihdr.biBitCount, + bmp_ihdr.biCompression, bmp_fhdr.bfOffBits); + + /* rowStride in bytes */ + row_bytes = (bmp_ihdr.biWidth * bmp_ihdr.biBitCount) >> 3; + /* align to 4bytes boundary */ + row_bytes = (row_bytes + 3) & ~3; + + /* Rice hi-res textures */ + if (!(bmp_ihdr.biBitCount == 8 || bmp_ihdr.biBitCount == 4 || bmp_ihdr.biBitCount == 32 || bmp_ihdr.biBitCount == 24) || + bmp_ihdr.biCompression != 0) { + DBG_INFO(80, L"Error: incompatible bitmap format!\n"); + return NULL; + } + + switch (bmp_ihdr.biBitCount) { + case 8: + case 32: + /* 8 bit, 32 bit bitmap */ + image = (uint8*)malloc(row_bytes * bmp_ihdr.biHeight); + if (image) { + tmpimage = image; + pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1); + for (i = 0; i < bmp_ihdr.biHeight; i++) { + /* read in image */ + fseek(fp, pos, SEEK_SET); + fread(tmpimage, row_bytes, 1, fp); + tmpimage += row_bytes; + pos -= row_bytes; + } + } + break; + case 4: + /* 4bit bitmap */ + image = (uint8*)malloc((row_bytes * bmp_ihdr.biHeight) << 1); + image_row = (uint8*)malloc(row_bytes); + if (image && image_row) { + tmpimage = image; + pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1); + for (i = 0; i < bmp_ihdr.biHeight; i++) { + /* read in image */ + fseek(fp, pos, SEEK_SET); + fread(image_row, row_bytes, 1, fp); + /* expand 4bpp to 8bpp. stuff 4bit values into 8bit comps. */ + for (j = 0; j < row_bytes; j++) { + tmpimage[j << 1] = image_row[j] & 0x0f; + tmpimage[(j << 1) + 1] = (image_row[j] & 0xf0) >> 4; + } + tmpimage += (row_bytes << 1); + pos -= row_bytes; + } + free(image_row); + } else { + if (image_row) free(image_row); + if (image) free(image); + image = NULL; + } + break; + case 24: + /* 24 bit bitmap */ + image = (uint8*)malloc((bmp_ihdr.biWidth * bmp_ihdr.biHeight) << 2); + image_row = (uint8*)malloc(row_bytes); + if (image && image_row) { + tmpimage = image; + pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1); + for (i = 0; i < bmp_ihdr.biHeight; i++) { + /* read in image */ + fseek(fp, pos, SEEK_SET); + fread(image_row, row_bytes, 1, fp); + /* convert 24bpp to 32bpp. */ + for (j = 0; j < bmp_ihdr.biWidth; j++) { + tmpimage[(j << 2)] = image_row[j * 3]; + tmpimage[(j << 2) + 1] = image_row[j * 3 + 1]; + tmpimage[(j << 2) + 2] = image_row[j * 3 + 2]; + tmpimage[(j << 2) + 3] = 0xFF; + } + tmpimage += (bmp_ihdr.biWidth << 2); + pos -= row_bytes; + } + free(image_row); + } else { + if (image_row) free(image_row); + if (image) free(image); + image = NULL; + } + } + + if (image) { + *width = (row_bytes << 3) / bmp_ihdr.biBitCount; + *height = bmp_ihdr.biHeight; + + switch (bmp_ihdr.biBitCount) { + case 8: + case 4: + *format = GR_TEXFMT_P_8; + break; + case 32: + case 24: + *format = GR_TEXFMT_ARGB_8888; + } + +#if POW2_TEXTURES + /* next power of 2 size conversions */ + /* NOTE: I can do this in the above loop for faster operations, but some + * texture packs require a workaround. see HACKALERT in nextPow2(). + */ + + TxReSample txReSample = new TxReSample; // XXX: temporary. move to a better place. + +#if (POW2_TEXTURES == 2) + if (!txReSample->nextPow2(&image, width, height, 8, 1)) { +#else + if (!txReSample->nextPow2(&image, width, height, 8, 0)) { +#endif + if (image) { + free(image); + image = NULL; + } + *width = 0; + *height = 0; + *format = 0; + } + + delete txReSample; + +#endif /* POW2_TEXTURES */ + } + +#ifdef DEBUG + if (!image) { + DBG_INFO(80, L"Error: failed to load bmp image!\n"); + } +#endif + + return image; +} + +boolean +TxImage::getDDSInfo(FILE *fp, DDSFILEHEADER *dds_fhdr) +{ + /* + * read in DDSFILEHEADER + */ + + /* is this a DDS file? */ + if (fread(&dds_fhdr->dwMagic, 4, 1, fp) != 1) + return 0; + + if (memcmp(&dds_fhdr->dwMagic, "DDS ", 4) != 0) + return 0; + + if (fread(&dds_fhdr->dwSize, 4, 1, fp) != 1) + return 0; + + /* get file flags */ + if (fread(&dds_fhdr->dwFlags, 4, 1, fp) != 1) + return 0; + + /* height of dds in pixels */ + if (fread(&dds_fhdr->dwHeight, 4, 1, fp) != 1) + return 0; + + /* width of dds in pixels */ + if (fread(&dds_fhdr->dwWidth, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->dwLinearSize, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->dwDepth, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->dwMipMapCount, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->dwReserved1, 4 * 11, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwSize, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwFlags, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwFourCC, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwRGBBitCount, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwRBitMask, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwGBitMask, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwBBitMask, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->ddpf.dwRGBAlphaBitMask, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->dwCaps1, 4, 1, fp) != 1) + return 0; + + if (fread(&dds_fhdr->dwCaps2, 4, 1, fp) != 1) + return 0; + + return 1; +} + +uint8* +TxImage::readDDS(FILE* fp, int* width, int* height, uint16* format) +{ + uint8 *image = NULL; + DDSFILEHEADER dds_fhdr; + uint16 tmpformat = 0; + + /* initialize */ + *width = 0; + *height = 0; + *format = 0; + + /* check if we have a valid dds file */ + if (!fp) + return NULL; + + if (!getDDSInfo(fp, &dds_fhdr)) { + INFO(80, L"error reading dds file! dds image is corrupt.\n"); + return NULL; + } + + DBG_INFO(80, L"dds format %d x %d HeaderSize %d LinearSize %d\n", + dds_fhdr.dwWidth, dds_fhdr.dwHeight, dds_fhdr.dwSize, dds_fhdr.dwLinearSize); + + if (!(dds_fhdr.dwFlags & (DDSD_CAPS|DDSD_WIDTH|DDSD_HEIGHT|DDSD_PIXELFORMAT|DDSD_LINEARSIZE))) { + DBG_INFO(80, L"Error: incompatible dds format!\n"); + return NULL; + } + + if ((dds_fhdr.dwFlags & DDSD_MIPMAPCOUNT) && dds_fhdr.dwMipMapCount != 1) { + DBG_INFO(80, L"Error: mipmapped dds not supported!\n"); + return NULL; + } + + if (!((dds_fhdr.ddpf.dwFlags & DDPF_FOURCC) && dds_fhdr.dwCaps2 == 0)) { + DBG_INFO(80, L"Error: not fourcc standard texture!\n"); + return NULL; + } + + if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT1", 4) == 0) { + DBG_INFO(80, L"DXT1 format\n"); + /* compensate for missing LinearSize */ + dds_fhdr.dwLinearSize = (dds_fhdr.dwWidth * dds_fhdr.dwHeight) >> 1; + tmpformat = GR_TEXFMT_ARGB_CMP_DXT1; + } else if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT3", 4) == 0) { + DBG_INFO(80, L"DXT3 format\n"); + dds_fhdr.dwLinearSize = dds_fhdr.dwWidth * dds_fhdr.dwHeight; + tmpformat = GR_TEXFMT_ARGB_CMP_DXT3; + } else if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT5", 4) == 0) { + DBG_INFO(80, L"DXT5 format\n"); + dds_fhdr.dwLinearSize = dds_fhdr.dwWidth * dds_fhdr.dwHeight; + tmpformat = GR_TEXFMT_ARGB_CMP_DXT5; + } else { + DBG_INFO(80, L"Error: not DXT1 or DXT3 or DXT5 format!\n"); + return NULL; + } + + /* read in image */ + image = (uint8*)malloc(dds_fhdr.dwLinearSize); + if (image) { + *width = dds_fhdr.dwWidth; + *height = dds_fhdr.dwHeight; + *format = tmpformat; + + fseek(fp, 128, SEEK_SET); /* size of header is 128 bytes */ + fread(image, dds_fhdr.dwLinearSize, 1, fp); + } + + return image; +} diff --git a/Source/GlideHQ/TxImage.h b/Source/GlideHQ/TxImage.h new file mode 100644 index 000000000..64331f537 --- /dev/null +++ b/Source/GlideHQ/TxImage.h @@ -0,0 +1,116 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXIMAGE_H__ +#define __TXIMAGE_H__ + +#include +#include +#include "TxInternal.h" + +#ifndef WIN32 +typedef struct tagBITMAPFILEHEADER { + unsigned short bfType; + unsigned long bfSize; + unsigned short bfReserved1; + unsigned short bfReserved2; + unsigned long bfOffBits; +} BITMAPFILEHEADER; + +typedef struct tagBITMAPINFOHEADER { + unsigned long biSize; + long biWidth; + long biHeight; + unsigned short biPlanes; + unsigned short biBitCount; + unsigned long biCompression; + unsigned long biSizeImage; + long biXPelsPerMeter; + long biYPelsPerMeter; + unsigned long biClrUsed; + unsigned long biClrImportant; +} BITMAPINFOHEADER; +#else +typedef struct tagBITMAPFILEHEADER BITMAPFILEHEADER; +typedef struct tagBITMAPINFOHEADER BITMAPINFOHEADER; +#endif + +#define DDSD_CAPS 0x00000001 +#define DDSD_HEIGHT 0x00000002 +#define DDSD_WIDTH 0x00000004 +#define DDSD_PITCH 0x00000008 +#define DDSD_PIXELFORMAT 0x00001000 +#define DDSD_MIPMAPCOUNT 0x00020000 +#define DDSD_LINEARSIZE 0x00080000 +#define DDSD_DEPTH 0x00800000 + +#define DDPF_ALPHAPIXELS 0x00000001 +#define DDPF_FOURCC 0x00000004 +#define DDPF_RGB 0x00000040 + +#define DDSCAPS_COMPLEX 0x00000008 +#define DDSCAPS_TEXTURE 0x00001000 +#define DDSCAPS_MIPMAP 0x00400000 + +typedef struct tagDDSPIXELFORMAT { + unsigned long dwSize; + unsigned long dwFlags; + unsigned long dwFourCC; + unsigned long dwRGBBitCount; + unsigned long dwRBitMask; + unsigned long dwGBitMask; + unsigned long dwBBitMask; + unsigned long dwRGBAlphaBitMask; +} DDSPIXELFORMAT; + +typedef struct tagDDSFILEHEADER { + unsigned long dwMagic; + unsigned long dwSize; + unsigned long dwFlags; + unsigned long dwHeight; + unsigned long dwWidth; + unsigned long dwLinearSize; + unsigned long dwDepth; + unsigned long dwMipMapCount; + unsigned long dwReserved1[11]; + DDSPIXELFORMAT ddpf; + unsigned long dwCaps1; + unsigned long dwCaps2; +} DDSFILEHEADER; + +class TxImage +{ +private: + boolean getPNGInfo(FILE *fp, png_structp *png_ptr, png_infop *info_ptr); + boolean getBMPInfo(FILE *fp, BITMAPFILEHEADER *bmp_fhdr, BITMAPINFOHEADER *bmp_ihdr); + boolean getDDSInfo(FILE *fp, DDSFILEHEADER *dds_fhdr); +public: + TxImage() {} + ~TxImage() {} + uint8* readPNG(FILE* fp, int* width, int* height, uint16* format); + boolean writePNG(uint8* src, FILE* fp, int width, int height, int rowStride, uint16 format, uint8 *palette); + uint8* readBMP(FILE* fp, int* width, int* height, uint16* format); + uint8* readDDS(FILE* fp, int* width, int* height, uint16* format); +}; + +#endif /* __TXIMAGE_H__ */ diff --git a/Source/GlideHQ/TxInternal.h b/Source/GlideHQ/TxInternal.h new file mode 100644 index 000000000..3f0be6d9d --- /dev/null +++ b/Source/GlideHQ/TxInternal.h @@ -0,0 +1,100 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __INTERNAL_H__ +#define __INTERNAL_H__ + +#include "Ext_TxFilter.h" + +/* dll exports */ +#ifdef TXFILTER_DLL +#define TAPI __declspec(dllexport) +#define TAPIENTRY +#else +#define TAPI +#define TAPIENTRY +#endif + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned long uint32; + +#ifdef WIN32 +#define KBHIT(key) ((GetAsyncKeyState(key) & 0x8001) == 0x8001) +#else +#define KBHIT(key) (0) +#endif + +/* from OpenGL glext.h */ +#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 +#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 +#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 + +/* for explicit fxt1 compression */ +#define CC_CHROMA 0x0 +#define CC_HI 0x1 +#define CC_ALPHA 0x2 + +/* in-memory zlib texture compression */ +#define GR_TEXFMT_GZ 0x8000 + +#if 0 /* this is here to remind me of other formats */ +/* from 3Dfx Interactive Inc. glide.h */ +#define GR_TEXFMT_8BIT 0x0 +#define GR_TEXFMT_RGB_332 GR_TEXFMT_8BIT +#define GR_TEXFMT_YIQ_422 0x1 +#define GR_TEXFMT_ALPHA_8 0x2 /* (0..0xFF) alpha */ +#define GR_TEXFMT_INTENSITY_8 0x3 /* (0..0xFF) intensity */ +#define GR_TEXFMT_ALPHA_INTENSITY_44 0x4 +#define GR_TEXFMT_P_8 0x5 /* 8-bit palette */ +#define GR_TEXFMT_RSVD0 0x6 /* GR_TEXFMT_P_8_RGBA */ +#define GR_TEXFMT_P_8_6666 GR_TEXFMT_RSVD0 +#define GR_TEXFMT_P_8_6666_EXT GR_TEXFMT_RSVD0 +#define GR_TEXFMT_RSVD1 0x7 +#define GR_TEXFMT_16BIT 0x8 +#define GR_TEXFMT_ARGB_8332 GR_TEXFMT_16BIT +#define GR_TEXFMT_AYIQ_8422 0x9 +#define GR_TEXFMT_RGB_565 0xa +#define GR_TEXFMT_ARGB_1555 0xb +#define GR_TEXFMT_ARGB_4444 0xc +#define GR_TEXFMT_ALPHA_INTENSITY_88 0xd +#define GR_TEXFMT_AP_88 0xe /* 8-bit alpha 8-bit palette */ +#define GR_TEXFMT_RSVD2 0xf +#define GR_TEXFMT_RSVD4 GR_TEXFMT_RSVD2 + +/* from 3Dfx Interactive Inc. g3ext.h */ +#define GR_TEXFMT_ARGB_CMP_FXT1 0x11 +#define GR_TEXFMT_ARGB_8888 0x12 +#define GR_TEXFMT_YUYV_422 0x13 +#define GR_TEXFMT_UYVY_422 0x14 +#define GR_TEXFMT_AYUV_444 0x15 +#define GR_TEXFMT_ARGB_CMP_DXT1 0x16 +#define GR_TEXFMT_ARGB_CMP_DXT2 0x17 +#define GR_TEXFMT_ARGB_CMP_DXT3 0x18 +#define GR_TEXFMT_ARGB_CMP_DXT4 0x19 +#define GR_TEXFMT_ARGB_CMP_DXT5 0x1A +#define GR_TEXTFMT_RGB_888 0xFF +#endif + +#endif /* __INTERNAL_H__ */ diff --git a/Source/GlideHQ/TxQuantize.cpp b/Source/GlideHQ/TxQuantize.cpp new file mode 100644 index 000000000..09100f4ef --- /dev/null +++ b/Source/GlideHQ/TxQuantize.cpp @@ -0,0 +1,2405 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef WIN32 +#pragma warning(disable: 4786) +#endif + +/* NOTE: The codes are not optimized. They can be made faster. */ + +#include "TxQuantize.h" + +TxQuantize::TxQuantize() +{ + _txUtil = new TxUtil(); + + /* get number of CPU cores. */ + _numcore = _txUtil->getNumberofProcessors(); + + /* get dxtn extensions */ + _tx_compress_fxt1 = TxLoadLib::getInstance()->getfxtCompressTexFuncExt(); + _tx_compress_dxtn = TxLoadLib::getInstance()->getdxtCompressTexFuncExt(); +} + + +TxQuantize::~TxQuantize() +{ + delete _txUtil; +} + +void +TxQuantize::ARGB1555_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = (((*src & 0x00008000) ? 0xff000000 : 0x00000000) | + ((*src & 0x00007c00) << 9) | ((*src & 0x00007000) << 4) | + ((*src & 0x000003e0) << 6) | ((*src & 0x00000380) << 1) | + ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2)); + dest++; + *dest = (((*src & 0x80000000) ? 0xff000000 : 0x00000000) | + ((*src & 0x7c000000) >> 7) | ((*src & 0x70000000) >> 12) | + ((*src & 0x03e00000) >> 10) | ((*src & 0x03800000) >> 15) | + ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18)); + dest++; + src++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // arrr rrgg gggb bbbb + // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + mov edx, eax; // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb + mov ebx, 0x00000000; + and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000 + jz transparent1; + mov ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000 + + transparent1: + mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb + and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000 + shl edx, 4; // edx = 00000000 00000rrr rr000000 00000000 + or ebx, edx; // ebx = aaaaaaaa 00000rrr rr000000 00000000 + shl edx, 5; // edx = 00000000 rrrrr000 00000000 00000000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr rr000000 00000000 + and ebx, 0xffff0000; // ebx = aaaaaaaa rrrrrrrr 00000000 00000000 + mov edx, eax; + and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000 + shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000 + shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000 + and ebx, 0xffffff00; // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000 + mov edx, eax; + and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb + shl edx, 3; // edx = 00000000 00000000 00000000 bbbbb000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000 + shr edx, 5; // edx = 00000000 00000000 00000000 00000bbb + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], ebx; + add edi, 4; + + shr eax, 16; // eax = 00000000 00000000 arrrrrgg gggbbbbb + mov edx, eax; // edx = 00000000 00000000 arrrrrgg gggbbbbb + mov ebx, 0x00000000; + and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000 + jz transparent2; + mov ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000 + + transparent2: + mov eax, edx; // eax = 00000000 00000000 arrrrrgg gggbbbbb + and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000 + shl edx, 4; // edx = 00000000 00000rrr rr000000 00000000 + or ebx, edx; // ebx = aaaaaaaa 00000rrr rr000000 00000000 + shl edx, 5; // edx = 00000000 rrrrr000 00000000 00000000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr rr000000 00000000 + and ebx, 0xffff0000; // ebx = aaaaaaaa rrrrrrrr 00000000 00000000 + mov edx, eax; + and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000 + shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000 + shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000 + and ebx, 0xffffff00; // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000 + mov edx, eax; + and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb + shl edx, 3; // edx = 00000000 00000000 00000000 bbbbb000 + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000 + shr edx, 5; // edx = 00000000 00000000 00000000 00000bbb + or ebx, edx; // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], ebx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB4444_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = ((*src & 0x0000f000) << 12) | + ((*src & 0x00000f00) << 8) | + ((*src & 0x000000f0) << 4) | + (*src & 0x0000000f); + *dest |= (*dest << 4); + dest++; + *dest = ((*src & 0xf0000000) | + ((*src & 0x0f000000) >> 4) | + ((*src & 0x00f00000) >> 8) | + ((*src & 0x000f0000) >> 12)); + *dest |= (*dest >> 4); + dest++; + src++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaa rrrr gggg bbbb + // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + mov edx, eax; + and eax, 0x0000ffff; + mov ebx, eax; // 00000000 00000000 aaaarrrr ggggbbbb + and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000 + shl ebx, 12; // 0000aaaa 00000000 00000000 00000000 + or eax, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb + mov ebx, eax; + and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000 + shl ebx, 8; // 00000000 0000rrrr 00000000 00000000 + or eax, ebx; // 0000aaaa 0000rrrr aaaarrrr ggggbbbb + mov ebx, eax; + and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000 + shl ebx, 4; // 00000000 00000000 0000gggg 00000000 + and eax, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb + or eax, ebx; // 0000aaaa 0000rrrr 0000gggg 0000bbbb + mov ebx, eax; + shl ebx, 4; // aaaa0000 rrrr0000 gggg0000 bbbb0000 + or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + shr edx, 16; + mov ebx, edx; // 00000000 00000000 aaaarrrr ggggbbbb + and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000 + shl ebx, 12; // 0000aaaa 00000000 00000000 00000000 + or edx, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb + mov ebx, edx; + and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000 + shl ebx, 8; // 00000000 0000rrrr 00000000 00000000 + or edx, ebx; // 0000aaaa 0000rrrr aaaarrrr ggggbbbb + mov ebx, edx; + and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000 + shl ebx, 4; // 00000000 00000000 0000gggg 00000000 + and edx, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb + or edx, ebx; // 0000aaaa 0000rrrr 0000gggg 0000bbbb + mov ebx, edx; + shl ebx, 4; // aaaa0000 rrrr0000 gggg0000 bbbb0000 + or edx, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::RGB565_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = (0xff000000 | + ((*src & 0x0000f800) << 8) | ((*src & 0x0000e000) << 3) | + ((*src & 0x000007e0) << 5) | ((*src & 0x00000600) >> 1) | + ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2)); + dest++; + *dest = (0xff000000 | + ((*src & 0xf8000000) >> 8) | ((*src & 0xe0000000) >> 13) | + ((*src & 0x07e00000) >> 11) | ((*src & 0x06000000) >> 17) | + ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18)); + dest++; + src++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // rrrr rggg gggb bbbb + // 11111111 rrrrrrrr gggggggg bbbbbbbb + mov edx, eax; + and eax, 0x0000ffff; + mov ebx, eax; // 00000000 00000000 rrrrrggg gggbbbbb + and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000 + shl ebx, 5; // 00000000 000rrrrr 00000000 00000000 + or eax, ebx; // 00000000 000rrrrr rrrrrggg gggbbbbb + mov ebx, eax; + and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000 + shl ebx, 5; // 00000000 00000000 gggggg00 00000000 + and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb + shl eax, 3; // 00000000 rrrrr000 00000000 bbbbb000 + or eax, ebx; // 00000000 rrrrr000 gggggg00 bbbbb000 + mov ebx, eax; + shr ebx, 5; // 00000000 00000rrr rr000ggg ggg00bbb + and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb + or eax, ebx; // 00000000 rrrrrrrr gggggg00 bbbbbbbb + mov ebx, eax; + shr ebx, 6; + and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000 + or eax, ebx // 00000000 rrrrrrrr gggggggg bbbbbbbb + or eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + shr edx, 16; + mov eax, edx; // 00000000 00000000 rrrrrggg gggbbbbb + and eax, 0x0000ffff; + mov ebx, eax; // 00000000 00000000 rrrrrggg gggbbbbb + and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000 + shl ebx, 5; // 00000000 000rrrrr 00000000 00000000 + or eax, ebx; // 00000000 000rrrrr rrrrrggg gggbbbbb + mov ebx, eax; + and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000 + shl ebx, 5; // 00000000 00000000 gggggg00 00000000 + and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb + shl eax, 3; // 00000000 rrrrr000 00000000 bbbbb000 + or eax, ebx; // 00000000 rrrrr000 gggggg00 bbbbb000 + mov ebx, eax; + shr ebx, 5; // 00000000 00000rrr rr000ggg ggg00bbb + and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb + or eax, ebx; // 00000000 rrrrrrrr gggggg00 bbbbbbbb + mov ebx, eax; + shr ebx, 6; + and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000 + or eax, ebx // 00000000 rrrrrrrr gggggggg bbbbbbbb + or eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::A8_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 2; + int i; + for (i = 0; i < siz; i++) { + *dest = (*src & 0x000000ff); + *dest |= (*dest << 8); + *dest |= (*dest << 16); + dest++; + *dest = (*src & 0x0000ff00); + *dest |= (*dest >> 8); + *dest |= (*dest << 16); + dest++; + *dest = (*src & 0x00ff0000); + *dest |= (*dest << 8); + *dest |= (*dest >> 16); + dest++; + *dest = (*src & 0xff000000); + *dest |= (*dest >> 8); + *dest |= (*dest >> 16); + dest++; + src++; + } +#else + int siz = (width * height) >> 2; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaaaaaa + // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + mov edx, eax; + and eax, 0x000000ff; + mov ebx, eax; // 00000000 00000000 00000000 aaaaaaaa + shl ebx, 8; // 00000000 00000000 aaaaaaaa 00000000 + or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa + mov ebx, eax; + shl ebx, 16; // aaaaaaaa aaaaaaaa 00000000 00000000 + or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0x0000ff00; + mov ebx, eax; // 00000000 00000000 aaaaaaaa 00000000 + shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa + or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa + mov ebx, eax; + shl ebx, 16; // aaaaaaaa aaaaaaaa 00000000 00000000 + or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0x00ff0000; + mov ebx, eax; // 00000000 aaaaaaaa 00000000 00000000 + shl ebx, 8; // aaaaaaaa 00000000 00000000 00000000 + or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000 + mov ebx, eax; + shr ebx, 16; // 00000000 00000000 aaaaaaaa aaaaaaaa + or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0xff000000; + mov ebx, eax; // aaaaaaaa 00000000 00000000 00000000 + shr ebx, 8; // 00000000 aaaaaaaa 00000000 00000000 + or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000 + mov ebx, eax; + shr ebx, 16; // 00000000 00000000 aaaaaaaa aaaaaaaa + or eax, ebx; // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb + + mov dword ptr [edi], eax; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::AI44_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 2; + int i; + for (i = 0; i < siz; i++) { + *dest = (*src & 0x0000000f); + *dest |= ((*dest << 8) | (*dest << 16)); + *dest |= ((*src & 0x000000f0) << 20); + *dest |= (*dest << 4); + dest++; + *dest = (*src & 0x00000f00); + *dest |= ((*dest << 8) | (*dest >> 8)); + *dest |= ((*src & 0x0000f000) << 12); + *dest |= (*dest << 4); + dest++; + *dest = (*src & 0x000f0000); + *dest |= ((*dest >> 8) | (*dest >> 16)); + *dest |= ((*src & 0x00f00000) << 4); + *dest |= (*dest << 4); + dest++; + *dest = ((*src & 0x0f000000) >> 4); + *dest |= ((*dest >> 8) | (*dest >> 16)); + *dest |= (*src & 0xf0000000); + *dest |= (*dest >> 4); + dest++; + src++; + } +#else + int siz = (width * height) >> 2; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaaiiii + // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + mov edx, eax; + and eax, 0x000000f0; // 00000000 00000000 00000000 aaaa0000 + mov ebx, edx; + shl eax, 20; // 0000aaaa 00000000 00000000 00000000 + and ebx, 0x0000000f; // 00000000 00000000 00000000 0000iiii + or eax, ebx; // 0000aaaa 00000000 00000000 0000iiii + shl ebx, 8; // 00000000 00000000 0000iiii 00000000 + or eax, ebx; // 0000aaaa 00000000 0000iiii 0000iiii + shl ebx, 8; // 00000000 0000iiii 00000000 00000000 + or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii + mov ebx, eax; + shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000 + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0x0000f000; // 00000000 00000000 aaaa0000 00000000 + mov ebx, edx; + shl eax, 12; // 0000aaaa 00000000 00000000 00000000 + and ebx, 0x00000f00; // 00000000 00000000 0000iiii 00000000 + or eax, ebx; // 0000aaaa 00000000 0000iiii 00000000 + shr ebx, 8; // 00000000 00000000 00000000 0000iiii + or eax, ebx; // 0000aaaa 00000000 0000iiii 0000iiii + shl ebx, 16; // 00000000 0000iiii 00000000 00000000 + or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii + mov ebx, eax; + shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000 + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0x00f00000; // 00000000 aaaa0000 00000000 00000000 + mov ebx, edx; + shl eax, 4; // 0000aaaa 00000000 00000000 00000000 + and ebx, 0x000f0000; // 00000000 0000iiii 00000000 00000000 + or eax, ebx; // 0000aaaa 0000iiii 00000000 00000000 + shr ebx, 8; // 00000000 00000000 0000iiii 00000000 + or eax, ebx; // 0000aaaa 0000iiii 0000iiii 00000000 + shr ebx, 8; // 00000000 00000000 00000000 0000iiii + or eax, ebx; // 0000aaaa 0000iiii 0000iiii 0000iiii + mov ebx, eax; + shl ebx, 4; // aaaa0000 iiii0000 iiii0000 iiii0000 + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0xf0000000; // aaaa0000 00000000 00000000 00000000 + mov ebx, edx; + and ebx, 0x0f000000; // 0000iiii 00000000 00000000 00000000 + shr ebx, 4; // 00000000 iiii0000 00000000 00000000 + or eax, ebx; // aaaa0000 iiii0000 00000000 00000000 + shr ebx, 8; // 00000000 00000000 iiii0000 00000000 + or eax, ebx; // aaaa0000 iiii0000 iiii0000 00000000 + shr ebx, 8; // 00000000 00000000 00000000 iiii0000 + or eax, ebx; // aaaa0000 iiii0000 iiii0000 iiii0000 + mov ebx, eax; + shr ebx, 4; // 0000aaaa 0000iiii 0000iiii 0000iiii + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + + mov dword ptr [edi], eax; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::AI88_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = (*src & 0x000000ff); + *dest |= ((*dest << 8) | (*dest << 16)); + *dest |= ((*src & 0x0000ff00) << 16); + dest++; + *dest = (*src & 0x00ff0000); + *dest |= ((*dest >> 8) | (*dest >> 16)); + *dest |= (*src & 0xff000000); + dest++; + src++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaaaaaa iiiiiiii + // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + mov edx, eax; + and eax, 0x0000ffff; // 00000000 00000000 aaaaaaaa iiiiiiii + mov ebx, eax; // 00000000 00000000 aaaaaaaa iiiiiiii + shl eax, 16; // aaaaaaaa iiiiiiii 00000000 00000000 + and ebx, 0x000000ff; // 00000000 00000000 00000000 iiiiiiii + or eax, ebx; // aaaaaaaa iiiiiiii 00000000 iiiiiiii + shl ebx, 8; // 00000000 00000000 iiiiiiii 00000000 + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0xffff0000; // aaaaaaaa iiiiiiii 00000000 00000000 + mov ebx, eax; // aaaaaaaa iiiiiiii 00000000 00000000 + and ebx, 0x00ff0000; // 00000000 iiiiiiii 00000000 00000000 + shr ebx, 8; // 00000000 00000000 iiiiiiii 00000000 + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii 00000000 + shr ebx, 8; // 00000000 00000000 00000000 iiiiiiii + or eax, ebx; // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii + + mov dword ptr [edi], eax; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB8888_ARGB1555(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = ((*src & 0xff000000) ? 0x00008000 : 0x00000000); + *dest |= (((*src & 0x00f80000) >> 9) | + ((*src & 0x0000f800) >> 6) | + ((*src & 0x000000f8) >> 3)); + src++; + *dest |= ((*src & 0xff000000) ? 0x80000000 : 0x00000000); + *dest |= (((*src & 0x00f80000) << 7) | + ((*src & 0x0000f800) << 10) | + ((*src & 0x000000f8) << 13)); + src++; + dest++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + +#if 1 + mov edx, eax; + and eax, 0xff000000; // aaaa0000 00000000 00000000 00000000 + jz transparent1; + mov eax, 0x00008000; // 00000000 00000000 a0000000 00000000 + + transparent1: + mov ebx, edx; + and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000 + shr ebx, 9; // 00000000 00000000 0rrrrr00 00000000 + or eax, ebx; // 00000000 00000000 arrrrr00 00000000 + mov ebx, edx; + and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000 + shr ebx, 6; // 00000000 00000000 000000gg ggg00000 + or eax, ebx; // 00000000 00000000 arrrrrgg ggg00000 + and edx, 0x000000f8; // 00000000 00000000 00000000 bbbbb000 + shr edx, 3; // 00000000 00000000 00000000 000bbbbb + or edx, eax; // 00000000 00000000 arrrrrgg gggbbbbb + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and eax, 0xff000000; // aaaa0000 00000000 00000000 00000000 + jz transparent2; + or edx, 0x80000000; // a0000000 00000000 arrrrrgg gggbbbbb + + transparent2: + mov eax, ebx; + and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000 + shl ebx, 7; // 0rrrrr00 00000000 00000000 00000000 + or edx, ebx; // arrrrr00 00000000 arrrrrgg gggbbbbb + mov ebx, eax; + and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000 + shl ebx, 10; // 000000gg ggg00000 00000000 00000000 + or edx, ebx; // arrrrrgg ggg00000 arrrrrgg gggbbbbb + and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000 + shl eax, 13; // 00000000 000bbbbb 00000000 00000000 + or edx, eax; // arrrrrgg gggbbbbb arrrrrgg gggbbbbb + + mov dword ptr [edi], edx; + add edi, 4; +#else + mov edx, eax; + and edx, 0x01000000; // 0000000a 00000000 00000000 00000000 + shr edx, 9; // 00000000 00000000 a0000000 00000000 + mov ebx, eax; + and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000 + shr ebx, 9; // 00000000 00000000 0rrrrr00 00000000 + or edx, ebx; // 00000000 00000000 arrrrr00 00000000 + mov ebx, eax; + and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000 + shr ebx, 6; // 00000000 00000000 000000gg ggg00000 + or edx, ebx; // 00000000 00000000 arrrrrgg ggg00000 + and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000 + shr eax, 3; // 00000000 00000000 00000000 000bbbbb + or edx, eax; // 00000000 00000000 arrrrrgg gggbbbbb + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and ebx, 0x80000000; // a0000000 00000000 00000000 00000000 + or edx, ebx; // a0000000 00000000 arrrrrgg gggbbbbb + mov ebx, eax; + and ebx, 0x00f80000; // 00000000 rrrrr000 00000000 00000000 + shl ebx, 7; // 0rrrrr00 00000000 00000000 00000000 + or edx, ebx; // arrrrr00 00000000 arrrrrgg gggbbbbb + mov ebx, eax; + and ebx, 0x0000f800; // 00000000 00000000 ggggg000 00000000 + shl ebx, 10; // 000000gg ggg00000 00000000 00000000 + or edx, ebx; // arrrrrgg ggg00000 arrrrrgg gggbbbbb + and eax, 0x000000f8; // 00000000 00000000 00000000 bbbbb000 + shl eax, 13; // 00000000 000bbbbb 00000000 00000000 + or edx, eax; // arrrrrgg gggbbbbb arrrrrgg gggbbbbb + + mov dword ptr [edi], edx; + add edi, 4; +#endif + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB8888_ARGB4444(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = (((*src & 0xf0000000) >> 16) | + ((*src & 0x00f00000) >> 12) | + ((*src & 0x0000f000) >> 8) | + ((*src & 0x000000f0) >> 4)); + src++; + *dest |= ((*src & 0xf0000000) | + ((*src & 0x00f00000) << 4) | + ((*src & 0x0000f000) << 8) | + ((*src & 0x000000f0) << 12)); + src++; + dest++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + mov edx, eax; + and edx, 0xf0000000; // aaaa0000 00000000 00000000 00000000 + shr edx, 16; // 00000000 00000000 aaaa0000 00000000 + mov ebx, eax; + and ebx, 0x00f00000; // 00000000 rrrr0000 00000000 00000000 + shr ebx, 12; // 00000000 00000000 0000rrrr 00000000 + or edx, ebx; // 00000000 00000000 aaaarrrr 00000000 + mov ebx, eax; + and ebx, 0x0000f000; // 00000000 00000000 gggg0000 00000000 + shr ebx, 8; // 00000000 00000000 00000000 gggg0000 + or edx, ebx; // 00000000 00000000 aaaarrrr gggg0000 + and eax, 0x000000f0; // 00000000 00000000 00000000 bbbb0000 + shr eax, 4; // 00000000 00000000 00000000 0000bbbb + or edx, eax; // 00000000 00000000 aaaarrrr ggggbbbb + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and ebx, 0xf0000000; // aaaa0000 00000000 00000000 00000000 + or edx, ebx; // aaaa0000 00000000 aaaarrrr ggggbbbb + mov ebx, eax; + and ebx, 0x00f00000; // 00000000 rrrr0000 00000000 00000000 + shl ebx, 4; // 0000rrrr 00000000 00000000 00000000 + or edx, ebx; // aaaarrrr 00000000 aaaarrrr ggggbbbb + mov ebx, eax; + and ebx, 0x0000f000; // 00000000 00000000 gggg0000 00000000 + shl ebx, 8; // 00000000 gggg0000 00000000 00000000 + or edx, ebx; // aaaarrrr gggg0000 aaaarrrr ggggbbbb + and eax, 0x000000f0; // 00000000 00000000 00000000 bbbb0000 + shl eax, 12; // 00000000 0000bbbb 00000000 00000000 + or edx, eax; // arrrrrgg ggggbbbb aaaarrrr ggggbbbb + + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB8888_RGB565(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = (((*src & 0x000000f8) >> 3) | + ((*src & 0x0000fc00) >> 5) | + ((*src & 0x00f80000) >> 8)); + src++; + *dest |= (((*src & 0x000000f8) << 13) | + ((*src & 0x0000fc00) << 11) | + ((*src & 0x00f80000) << 8)); + src++; + dest++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + mov edx, eax; + and edx, 0x000000F8; // 00000000 00000000 00000000 bbbbb000 + shr edx, 3; // 00000000 00000000 00000000 000bbbbb + mov ebx, eax; + and ebx, 0x0000FC00; // 00000000 00000000 gggggg00 00000000 + shr ebx, 5; // 00000000 00000000 00000ggg ggg00000 + or edx, ebx; // 00000000 00000000 00000ggg gggbbbbb + mov ebx, eax; + and ebx, 0x00F80000; // 00000000 rrrrr000 00000000 00000000 + shr ebx, 8; // 00000000 00000000 rrrrr000 00000000 + or edx, ebx; // 00000000 00000000 rrrrrggg gggbbbbb + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and ebx, 0x000000F8; // 00000000 00000000 00000000 bbbbb000 + shl ebx, 13; // 00000000 000bbbbb 00000000 00000000 + or edx, ebx; // 00000000 000bbbbb rrrrrggg gggbbbbb + mov ebx, eax; + and ebx, 0x0000FC00; // 00000000 00000000 gggggg00 00000000 + shl ebx, 11; // 00000ggg ggg00000 00000000 00000000 + or edx, ebx; // 00000ggg gggbbbbb rrrrrggg gggbbbbb + mov ebx, eax; + and ebx, 0x00F80000; // 00000000 rrrrr000 00000000 00000000 + shl ebx, 8; // rrrrr000 00000000 00000000 00000000 + or edx, ebx; // rrrrrggg gggbbbbb rrrrrggg gggbbbbb + + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB8888_A8(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 2; + int i; + for (i = 0; i < siz; i++) { + *dest = (*src & 0x0000ff00) >> 8; + src++; + *dest |= (*src & 0x0000ff00); + src++; + *dest |= ((*src & 0x0000ff00) << 8); + src++; + *dest |= ((*src & 0x0000ff00) << 16); + src++; + dest++; + } +#else + int siz = (width * height) >> 2; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + +#if 0 + mov edx, eax; // we'll use A comp for every pixel + and edx, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + shr edx, 24; // 00000000 00000000 00000000 aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + shr eax, 16; // 00000000 00000000 aaaaaaaa 00000000 + or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + shr eax, 8; // 00000000 aaaaaaaa 00000000 00000000 + or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa +#endif + +#if 1 + mov edx, eax; // we'll use G comp for every pixel + and edx, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000 + shr edx, 8; // 00000000 00000000 00000000 aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000 + or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000 + shl eax, 8; // 00000000 aaaaaaaa 00000000 00000000 + or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000 + shl eax, 16; // aaaaaaaa 00000000 00000000 00000000 + or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa +#endif + +#if 0 + mov edx, eax; + and edx, 0x000000FF; // 00000000 00000000 00000000 aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0x0000FF00; // 00000000 00000000 aaaaaaaa 00000000 + or edx, eax; // 00000000 00000000 aaaaaaaa aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0x00FF0000; // 00000000 aaaaaaaa 00000000 00000000 + or edx, eax; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + or edx, eax; // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa +#endif + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB8888_AI44(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 2; + int i; + for (i = 0; i < siz; i++) { + *dest = (((*src & 0xf0000000) >> 24) | ((*src & 0x0000f000) >> 12)); + src++; + *dest |= (((*src & 0xf0000000) >> 16) | ((*src & 0x0000f000) >> 4)); + src++; + *dest |= (((*src & 0xf0000000) >> 8) | ((*src & 0x0000f000) << 4)); + src++; + *dest |= ((*src & 0xf0000000) | ((*src & 0x0000f000) << 12)); + src++; + dest++; + } +#else + int siz = (width * height) >> 2; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + mov edx, eax; // use A and G comps MSB + and edx, 0xF0000000; // aaaa0000 00000000 00000000 00000000 + mov ebx, eax; + shr edx, 24; // 00000000 00000000 00000000 aaaa0000 + and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000 + shr ebx, 12; // 00000000 00000000 00000000 0000iiii + or edx, ebx; // 00000000 00000000 00000000 aaaaiiii + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000 + shr eax, 16; // 00000000 00000000 aaaa0000 00000000 + and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000 + shr ebx, 4; // 00000000 00000000 0000iiii 00000000 + or eax, ebx; // 00000000 00000000 aaaaiiii 00000000 + or edx, eax; // 00000000 00000000 aaaaiiii aaaaiiii + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000 + shr eax, 8; // 00000000 aaaa0000 00000000 00000000 + and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000 + shl ebx, 4; // 00000000 0000iiii 00000000 00000000 + or eax, ebx; // 00000000 aaaaiiii 00000000 00000000 + or edx, eax; // 00000000 aaaaiiii aaaaiiii aaaaiiii + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and eax, 0xF0000000; // aaaa0000 00000000 00000000 00000000 + and ebx, 0x0000F000; // 00000000 00000000 iiii0000 00000000 + shl ebx, 12; // 0000iiii 00000000 00000000 00000000 + or eax, ebx; // aaaaiiii 00000000 00000000 00000000 + or edx, eax; // aaaaiiii aaaaiiii aaaaiiii aaaaiiii + + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +void +TxQuantize::ARGB8888_AI88(uint32* src, uint32* dest, int width, int height) +{ +#if 1 + int siz = (width * height) >> 1; + int i; + for (i = 0; i < siz; i++) { + *dest = (((*src & 0xff000000) >> 16) | ((*src & 0x0000ff00) >> 8)); + src++; + *dest |= ((*src & 0xff000000) | ((*src & 0x0000ff00) << 8)); + src++; + dest++; + } +#else + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + mov edx, eax; + and edx, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + mov ebx, eax; + shr edx, 16; // 00000000 00000000 aaaaaaaa 00000000 + and ebx, 0x0000FF00; // 00000000 00000000 iiiiiiii 00000000 + shr ebx, 8; // 00000000 00000000 00000000 iiiiiiii + or edx, ebx; // 00000000 00000000 aaaaaaaa iiiiiiii + + mov eax, dword ptr [esi]; + add esi, 4; + + mov ebx, eax; + and eax, 0xFF000000; // aaaaaaaa 00000000 00000000 00000000 + and ebx, 0x0000FF00; // 00000000 00000000 iiiiiiii 00000000 + shl ebx, 8; // 00000000 iiiiiiii 00000000 00000000 + or eax, ebx; // aaaaaaaa iiiiiiii 00000000 00000000 + or edx, eax; // aaaaaaaa iiiiiiii aaaaaaaa iiiiiiii + + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +/* R.W. Floyd and L. Steinberg, An adaptive algorithm + * for spatial grey scale, Proceedings of the Society + * of Information Display 17, pp75-77, 1976 + */ +void +TxQuantize::ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height) +{ + /* Floyd-Steinberg error-diffusion halftoning */ + + int i, x, y; + int qr, qg, qb; /* quantized incoming values */ + int ir, ig, ib; /* incoming values */ + int t; + int *errR = new int[width]; + int *errG = new int[width]; + int *errB = new int[width]; + + uint16 *dest = (uint16 *)dst; + + for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + /* incoming pixel values */ + ir = ((*src >> 16) & 0xFF) * 10000; + ig = ((*src >> 8) & 0xFF) * 10000; + ib = ((*src ) & 0xFF) * 10000; + + if (x == 0) qr = qg = qb = 0; + + /* quantize pixel values. + * qr * 0.4375 is the error from the pixel to the left, + * errR is the error from the pixel to the top, top left, and top right */ + /* qr * 0.4375 is the error distribution to the EAST in + * the previous loop */ + ir += errR[x] + qr * 4375 / 10000; + ig += errG[x] + qg * 4375 / 10000; + ib += errB[x] + qb * 4375 / 10000; + + /* error distribution to the SOUTH-EAST in the previous loop + * can't calculate in the previous loop because it steps on + * the above quantization */ + errR[x] = qr * 625 / 10000; + errG[x] = qg * 625 / 10000; + errB[x] = qb * 625 / 10000; + + qr = ir; + qg = ig; + qb = ib; + + /* clamp */ + if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000; + if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000; + if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000; + + /* convert to RGB565 */ + qr = qr * 0x1F / 2550000; + qg = qg * 0x3F / 2550000; + qb = qb * 0x1F / 2550000; + + /* this is the dithered pixel */ + t = (qr << 11) | (qg << 5) | qb; + + /* compute the errors */ + qr = ((qr << 3) | (qr >> 2)) * 10000; + qg = ((qg << 2) | (qg >> 4)) * 10000; + qb = ((qb << 3) | (qb >> 2)) * 10000; + qr = ir - qr; + qg = ig - qg; + qb = ib - qb; + + /* compute the error distributions */ + /* Floyd-Steinberg filter + * 7/16 (=0.4375) to the EAST + * 5/16 (=0.3125) to the SOUTH + * 1/16 (=0.0625) to the SOUTH-EAST + * 3/16 (=0.1875) to the SOUTH-WEST + * + * x 7/16 + * 3/16 5/16 1/16 + */ + /* SOUTH-WEST */ + if (x > 1) { + errR[x - 1] += qr * 1875 / 10000; + errG[x - 1] += qg * 1875 / 10000; + errB[x - 1] += qb * 1875 / 10000; + } + + /* SOUTH */ + errR[x] += qr * 3125 / 10000; + errG[x] += qg * 3125 / 10000; + errB[x] += qb * 3125 / 10000; + + *dest = (t & 0xFFFF); + + dest++; + src++; + } + } + + delete [] errR; + delete [] errG; + delete [] errB; +} + + +void +TxQuantize::ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height) +{ + /* Floyd-Steinberg error-diffusion halftoning */ + + int i, x, y; + int qr, qg, qb; /* quantized incoming values */ + int ir, ig, ib; /* incoming values */ + int t; + int *errR = new int[width]; + int *errG = new int[width]; + int *errB = new int[width]; + + uint16 *dest = (uint16 *)dst; + + for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + /* incoming pixel values */ + ir = ((*src >> 16) & 0xFF) * 10000; + ig = ((*src >> 8) & 0xFF) * 10000; + ib = ((*src ) & 0xFF) * 10000; + + if (x == 0) qr = qg = qb = 0; + + /* quantize pixel values. + * qr * 0.4375 is the error from the pixel to the left, + * errR is the error from the pixel to the top, top left, and top right */ + /* qr * 0.4375 is the error distribution to the EAST in + * the previous loop */ + ir += errR[x] + qr * 4375 / 10000; + ig += errG[x] + qg * 4375 / 10000; + ib += errB[x] + qb * 4375 / 10000; + + /* error distribution to the SOUTH-EAST of the previous loop. + * cannot calculate in the previous loop because it steps on + * the above quantization */ + errR[x] = qr * 625 / 10000; + errG[x] = qg * 625 / 10000; + errB[x] = qb * 625 / 10000; + + qr = ir; + qg = ig; + qb = ib; + + /* clamp */ + if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000; + if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000; + if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000; + + /* convert to RGB555 */ + qr = qr * 0x1F / 2550000; + qg = qg * 0x1F / 2550000; + qb = qb * 0x1F / 2550000; + + /* this is the dithered pixel */ + t = (qr << 10) | (qg << 5) | qb; + t |= ((*src >> 24) ? 0x8000 : 0); + + /* compute the errors */ + qr = ((qr << 3) | (qr >> 2)) * 10000; + qg = ((qg << 3) | (qg >> 2)) * 10000; + qb = ((qb << 3) | (qb >> 2)) * 10000; + qr = ir - qr; + qg = ig - qg; + qb = ib - qb; + + /* compute the error distributions */ + /* Floyd-Steinberg filter + * 7/16 (=0.4375) to the EAST + * 5/16 (=0.3125) to the SOUTH + * 1/16 (=0.0625) to the SOUTH-EAST + * 3/16 (=0.1875) to the SOUTH-WEST + * + * x 7/16 + * 3/16 5/16 1/16 + */ + /* SOUTH-WEST */ + if (x > 1) { + errR[x - 1] += qr * 1875 / 10000; + errG[x - 1] += qg * 1875 / 10000; + errB[x - 1] += qb * 1875 / 10000; + } + + /* SOUTH */ + errR[x] += qr * 3125 / 10000; + errG[x] += qg * 3125 / 10000; + errB[x] += qb * 3125 / 10000; + + *dest = (t & 0xFFFF); + + dest++; + src++; + } + } + + delete [] errR; + delete [] errG; + delete [] errB; +} + +void +TxQuantize::ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height) +{ + /* Floyd-Steinberg error-diffusion halftoning */ + + /* NOTE: alpha dithering looks better for alpha gradients, but are prone + * to producing noisy speckles for constant or step level alpha. Output + * results should always be checked. + */ + boolean ditherAlpha = 0; + + int i, x, y; + int qr, qg, qb, qa; /* quantized incoming values */ + int ir, ig, ib, ia; /* incoming values */ + int t; + int *errR = new int[width]; + int *errG = new int[width]; + int *errB = new int[width]; + int *errA = new int[width]; + + uint16 *dest = (uint16 *)dst; + + for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = errA[i] = 0; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + /* incoming pixel values */ + ir = ((*src >> 16) & 0xFF) * 10000; + ig = ((*src >> 8) & 0xFF) * 10000; + ib = ((*src ) & 0xFF) * 10000; + ia = ((*src >> 24) & 0xFF) * 10000; + + if (x == 0) qr = qg = qb = qa = 0; + + /* quantize pixel values. + * qr * 0.4375 is the error from the pixel to the left, + * errR is the error from the pixel to the top, top left, and top right */ + /* qr * 0.4375 is the error distribution to the EAST in + * the previous loop */ + ir += errR[x] + qr * 4375 / 10000; + ig += errG[x] + qg * 4375 / 10000; + ib += errB[x] + qb * 4375 / 10000; + ia += errA[x] + qa * 4375 / 10000; + + /* error distribution to the SOUTH-EAST of the previous loop. + * cannot calculate in the previous loop because it steps on + * the above quantization */ + errR[x] = qr * 625 / 10000; + errG[x] = qg * 625 / 10000; + errB[x] = qb * 625 / 10000; + errA[x] = qa * 625 / 10000; + + qr = ir; + qg = ig; + qb = ib; + qa = ia; + + /* clamp */ + if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000; + if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000; + if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000; + if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000; + + /* convert to RGB444 */ + qr = qr * 0xF / 2550000; + qg = qg * 0xF / 2550000; + qb = qb * 0xF / 2550000; + qa = qa * 0xF / 2550000; + + /* this is the value to be returned */ + if (ditherAlpha) { + t = (qa << 12) | (qr << 8) | (qg << 4) | qb; + } else { + t = (qr << 8) | (qg << 4) | qb; + t |= (*src >> 16) & 0xF000; + } + + /* compute the errors */ + qr = ((qr << 4) | qr) * 10000; + qg = ((qg << 4) | qg) * 10000; + qb = ((qb << 4) | qb) * 10000; + qa = ((qa << 4) | qa) * 10000; + qr = ir - qr; + qg = ig - qg; + qb = ib - qb; + qa = ia - qa; + + /* compute the error distributions */ + /* Floyd-Steinberg filter + * 7/16 (=0.4375) to the EAST + * 5/16 (=0.3125) to the SOUTH + * 1/16 (=0.0625) to the SOUTH-EAST + * 3/16 (=0.1875) to the SOUTH-WEST + * + * x 7/16 + * 3/16 5/16 1/16 + */ + /* SOUTH-WEST */ + if (x > 1) { + errR[x - 1] += qr * 1875 / 10000; + errG[x - 1] += qg * 1875 / 10000; + errB[x - 1] += qb * 1875 / 10000; + errA[x - 1] += qa * 1875 / 10000; + } + + /* SOUTH */ + errR[x] += qr * 3125 / 10000; + errG[x] += qg * 3125 / 10000; + errB[x] += qb * 3125 / 10000; + errA[x] += qa * 3125 / 10000; + + *dest = (t & 0xFFFF); + + dest++; + src++; + } + } + + delete [] errR; + delete [] errG; + delete [] errB; + delete [] errA; +} + +void +TxQuantize::ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height) +{ + /* Floyd-Steinberg error-diffusion halftoning */ + + /* NOTE: alpha dithering looks better for alpha gradients, but are prone + * to producing noisy speckles for constant or step level alpha. Output + * results should always be checked. + */ + boolean ditherAlpha = 0; + + int i, x, y; + int qi, qa; /* quantized incoming values */ + int ii, ia; /* incoming values */ + int t; + int *errI = new int[width]; + int *errA = new int[width]; + + uint8 *dest = (uint8 *)dst; + + for (i = 0; i < width; i++) errI[i] = errA[i] = 0; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 */ + ii = ((*src >> 16) & 0xFF) * 2990 + + ((*src >> 8) & 0xFF) * 5870 + + ((*src ) & 0xFF) * 1140; + ia = ((*src >> 24) & 0xFF) * 10000; + + if (x == 0) qi = qa = 0; + + /* quantize pixel values. + * qi * 0.4375 is the error from the pixel to the left, + * errI is the error from the pixel to the top, top left, and top right */ + /* qi * 0.4375 is the error distrtibution to the EAST in + * the previous loop */ + ii += errI[x] + qi * 4375 / 10000; + ia += errA[x] + qa * 4375 / 10000; + + /* error distribution to the SOUTH-EAST in the previous loop. + * cannot calculate in the previous loop because it steps on + * the above quantization */ + errI[x] = qi * 625 / 10000; + errA[x] = qa * 625 / 10000; + + qi = ii; + qa = ia; + + /* clamp */ + if (qi < 0) qi = 0; else if (qi > 2550000) qi = 2550000; + if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000; + + /* convert to I4 */ + qi = qi * 0xF / 2550000; + qa = qa * 0xF / 2550000; + + /* this is the value to be returned */ + if (ditherAlpha) { + t = (qa << 4) | qi; + } else { + t = qi; + t |= ((*src >> 24) & 0xF0); + } + + /* compute the errors */ + qi = ((qi << 4) | qi) * 10000; + qa = ((qa << 4) | qa) * 10000; + qi = ii - qi; + qa = ia - qa; + + /* compute the error distributions */ + /* Floyd-Steinberg filter + * 7/16 (=0.4375) to the EAST + * 5/16 (=0.3125) to the SOUTH + * 1/16 (=0.0625) to the SOUTH-EAST + * 3/16 (=0.1875) to the SOUTH-WEST + * + * x 7/16 + * 3/16 5/16 1/16 + */ + /* SOUTH-WEST */ + if (x > 1) { + errI[x - 1] += qi * 1875 / 10000; + errA[x - 1] += qa * 1875 / 10000; + } + + /* SOUTH */ + errI[x] += qi * 3125 / 10000; + errA[x] += qa * 3125 / 10000; + + *dest = t & 0xFF; + + dest++; + src++; + } + } + + delete [] errI; + delete [] errA; +} + +void +TxQuantize::ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height) +{ + int x, y; + uint16 *dest = (uint16 *)dst; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { +#if 1 + /* libpng style grayscale conversion. + * Reduce RGB files to grayscale with or without alpha + * using the equation given in Poynton's ColorFAQ at + * + * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net + * + * Y = 0.212671 * R + 0.715160 * G + 0.072169 * B + * + * We approximate this with + * + * Y = 0.21268 * R + 0.7151 * G + 0.07217 * B + * + * which can be expressed with integers as + * + * Y = (6969 * R + 23434 * G + 2365 * B)/32768 + * + * The calculation is to be done in a linear colorspace. + */ + *dest = (((int)((((*src >> 16) & 0xFF) * 6969 + + ((*src >> 8) & 0xFF) * 23434 + + ((*src ) & 0xFF) * 2365) / 32768) & 0xFF) | + (uint16)((*src >> 16) & 0xFF00)); +#else + /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 + * this is same as the standard NTSC gray scale conversion. */ + *dest = (((int)((((*src >> 16) & 0xFF) * 299 + + ((*src >> 8) & 0xFF) * 587 + + ((*src ) & 0xFF) * 114) / 1000) & 0xFF) | + (uint16)((*src >> 16) & 0xFF00)); +#endif + dest++; + src++; + } + } +} + +void +TxQuantize::ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height) +{ + int x, y; + uint8 *dest = (uint8 *)dst; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { +#if 1 + /* libpng style Intensity = (6969 * R + 23434 * G + 2365 * B)/32768 */ + *dest = (int)((((*src >> 16) & 0xFF) * 6969 + + ((*src >> 8) & 0xFF) * 23434 + + ((*src ) & 0xFF) * 2365) / 32768) & 0xFF; +#else + /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 + * this is same as the standard NTSC gray scale conversion. */ + *dest = (int)((((*src >>16) & 0xFF) * 299 + + ((*src >> 8) & 0xFF) * 587 + + ((*src ) & 0xFF) * 114) / 1000) & 0xFF; +#endif + dest++; + src++; + } + } +} + +void +TxQuantize::P8_16BPP(uint32* src, uint32* dest, int width, int height, uint32* palette) +{ + /* passed in palette is RGBA5551 format */ +#if 1 + int i; + int size = width * height; + for (i = 0; i < size; i++) { + ((uint16*)dest)[i] = ((uint16*)palette)[(int)(((uint8*)src)[i])]; + ((uint16*)dest)[i] = ((((uint16*)dest)[i] << 15) | (((uint16*)dest)[i] >> 1)); + } +#else + + /* not finished yet... */ + + int siz = (width * height) >> 2; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + mov edx, dword ptr [palette]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +#endif +} + +boolean +TxQuantize::quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer) +{ + typedef void (TxQuantize::*quantizerFunc)(uint32* src, uint32* dest, int width, int height); + quantizerFunc quantizer; + int bpp_shift = 0; + + if (destformat == GR_TEXFMT_ARGB_8888) { + switch (srcformat) { + case GR_TEXFMT_ARGB_1555: + quantizer = &TxQuantize::ARGB1555_ARGB8888; + bpp_shift = 1; + break; + case GR_TEXFMT_ARGB_4444: + quantizer = &TxQuantize::ARGB4444_ARGB8888; + bpp_shift = 1; + break; + case GR_TEXFMT_RGB_565: + quantizer = &TxQuantize::RGB565_ARGB8888; + bpp_shift = 1; + break; + case GR_TEXFMT_ALPHA_8: + quantizer = &TxQuantize::A8_ARGB8888; + bpp_shift = 2; + break; + case GR_TEXFMT_ALPHA_INTENSITY_44: + quantizer = &TxQuantize::AI44_ARGB8888; + bpp_shift = 2; + break; + case GR_TEXFMT_ALPHA_INTENSITY_88: + quantizer = &TxQuantize::AI88_ARGB8888; + bpp_shift = 1; + break; + default: + return 0; + } + + unsigned int numcore = _numcore; + unsigned int blkrow = 0; + while (numcore > 1 && blkrow == 0) { + blkrow = (height >> 2) / numcore; + numcore--; + } + numcore = 1; + if (blkrow > 0 && numcore > 1) { +_asm int 3 +#ifdef tofix + boost::thread *thrd[MAX_NUMCORE]; + unsigned int i; + int blkheight = blkrow << 2; + unsigned int srcStride = (width * blkheight) << (2 - bpp_shift); + unsigned int destStride = srcStride << bpp_shift; + for (i = 0; i < numcore - 1; i++) { + thrd[i] = new boost::thread(boost::bind(quantizer, + this, + (uint32*)src, + (uint32*)dest, + width, + blkheight)); + src += srcStride; + dest += destStride; + } + thrd[i] = new boost::thread(boost::bind(quantizer, + this, + (uint32*)src, + (uint32*)dest, + width, + height - blkheight * i)); + for (i = 0; i < numcore; i++) { + thrd[i]->join(); + delete thrd[i]; + } +#endif + } else { + (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height); + } + + } else if (srcformat == GR_TEXFMT_ARGB_8888) { + switch (destformat) { + case GR_TEXFMT_ARGB_1555: + quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB1555 : &TxQuantize::ARGB8888_ARGB1555_ErrD; + bpp_shift = 1; + break; + case GR_TEXFMT_ARGB_4444: + quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB4444 : &TxQuantize::ARGB8888_ARGB4444_ErrD; + bpp_shift = 1; + break; + case GR_TEXFMT_RGB_565: + quantizer = fastQuantizer ? &TxQuantize::ARGB8888_RGB565 : &TxQuantize::ARGB8888_RGB565_ErrD; + bpp_shift = 1; + break; + case GR_TEXFMT_ALPHA_8: + case GR_TEXFMT_INTENSITY_8: + quantizer = fastQuantizer ? &TxQuantize::ARGB8888_A8 : &TxQuantize::ARGB8888_I8_Slow; + bpp_shift = 2; + break; + case GR_TEXFMT_ALPHA_INTENSITY_44: + quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI44 : &TxQuantize::ARGB8888_AI44_ErrD; + bpp_shift = 2; + break; + case GR_TEXFMT_ALPHA_INTENSITY_88: + quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI88 : &TxQuantize::ARGB8888_AI88_Slow; + bpp_shift = 1; + break; + default: + return 0; + } + + unsigned int numcore = _numcore; + unsigned int blkrow = 0; + while (numcore > 1 && blkrow == 0) { + blkrow = (height >> 2) / numcore; + numcore--; + } + if (blkrow > 0 && numcore > 1) { +_asm int 3 +#ifdef tofix + boost::thread *thrd[MAX_NUMCORE]; + unsigned int i; + int blkheight = blkrow << 2; + unsigned int srcStride = (width * blkheight) << 2; + unsigned int destStride = srcStride >> bpp_shift; + for (i = 0; i < numcore - 1; i++) { + thrd[i] = new boost::thread(boost::bind(quantizer, + this, + (uint32*)src, + (uint32*)dest, + width, + blkheight)); + src += srcStride; + dest += destStride; + } + thrd[i] = new boost::thread(boost::bind(quantizer, + this, + (uint32*)src, + (uint32*)dest, + width, + height - blkheight * i)); + for (i = 0; i < numcore; i++) { + thrd[i]->join(); + delete thrd[i]; + } +#endif + } else { + (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height); + } + + } else { + return 0; + } + + return 1; +} + +boolean +TxQuantize::FXT1(uint8 *src, uint8 *dest, + int srcwidth, int srcheight, uint16 srcformat, + int *destwidth, int *destheight, uint16 *destformat) +{ + /* + * NOTE: src must be in ARGB8888 format, srcformat describes + * the closest 16bbp representation of src. + * + * NOTE: I have modified the dxtn library to use ARGB format + * which originaly was ABGR format. + */ + + boolean bRet = 0; + + if (_tx_compress_fxt1 && + srcwidth >= 8 && srcheight >= 4) { + /* compress to fxt1 + * width and height must be larger than 8 and 4 respectively + */ + int dstRowStride = ((srcwidth + 7) & ~7) << 1; + int srcRowStride = (srcwidth << 2); + + unsigned int numcore = _numcore; + unsigned int blkrow = 0; + while (numcore > 1 && blkrow == 0) { + blkrow = (srcheight >> 2) / numcore; + numcore--; + } + if (blkrow > 0 && numcore > 1) { +_asm int 3 +#ifdef tofix + boost::thread *thrd[MAX_NUMCORE]; + unsigned int i; + int blkheight = blkrow << 2; + unsigned int srcStride = (srcwidth * blkheight) << 2; + unsigned int destStride = dstRowStride * blkrow; + for (i = 0; i < numcore - 1; i++) { + thrd[i] = new boost::thread(boost::bind(_tx_compress_fxt1, + srcwidth, + blkheight, + 4, + src, + srcRowStride, + dest, + dstRowStride)); + src += srcStride; + dest += destStride; + } + thrd[i] = new boost::thread(boost::bind(_tx_compress_fxt1, + srcwidth, + srcheight - blkheight * i, + 4, + src, + srcRowStride, + dest, + dstRowStride)); + for (i = 0; i < numcore; i++) { + thrd[i]->join(); + delete thrd[i]; + } +#endif + } else { + (*_tx_compress_fxt1)(srcwidth, /* width */ + srcheight, /* height */ + 4, /* comps: ARGB8888=4, RGB888=3 */ + src, /* source */ + srcRowStride, /* width*comps */ + dest, /* destination */ + dstRowStride); /* 16 bytes per 8x4 texel */ + } + + /* dxtn adjusts width and height to M8 and M4 respectively by replication */ + *destwidth = (srcwidth + 7) & ~7; + *destheight = (srcheight + 3) & ~3; + *destformat = GR_TEXFMT_ARGB_CMP_FXT1; + + bRet = 1; + } + + return bRet; +} + +boolean +TxQuantize::DXTn(uint8 *src, uint8 *dest, + int srcwidth, int srcheight, uint16 srcformat, + int *destwidth, int *destheight, uint16 *destformat) +{ + /* + * NOTE: src must be in ARGB8888 format, srcformat describes + * the closest 16bbp representation of src. + * + * NOTE: I have modified the dxtn library to use ARGB format + * which originaly was ABGR format. + */ + + boolean bRet = 0; + + if (_tx_compress_dxtn && + srcwidth >= 4 && srcheight >= 4) { + /* compress to dxtn + * width and height must be larger than 4 + */ + + /* skip formats that DXTn won't help in size. */ + if (srcformat == GR_TEXFMT_ALPHA_8 || + srcformat == GR_TEXFMT_ALPHA_INTENSITY_44) { + ; /* shutup compiler */ + } else { + int dstRowStride = ((srcwidth + 3) & ~3) << 2; + int compression = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; + + *destformat = GR_TEXFMT_ARGB_CMP_DXT5; + +#if !GLIDE64_DXTN + /* okay... we are going to disable DXT1 with 1bit alpha + * for Glide64. some textures have all 0 alpha values. + * see "N64 Kobe Bryant in NBA Courtside" + */ + if (srcformat == GR_TEXFMT_ARGB_1555) { + dstRowStride >>= 1; + compression = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; + *destformat = GR_TEXFMT_ARGB_CMP_DXT1; + } else +#endif + if (srcformat == GR_TEXFMT_RGB_565 || + srcformat == GR_TEXFMT_INTENSITY_8) { + dstRowStride >>= 1; + compression = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; + *destformat = GR_TEXFMT_ARGB_CMP_DXT1; + } + + unsigned int numcore = _numcore; + unsigned int blkrow = 0; + while (numcore > 1 && blkrow == 0) { + blkrow = (srcheight >> 2) / numcore; + numcore--; + } + if (blkrow > 0 && numcore > 1) { +_asm int 3 +#ifdef tofix + boost::thread *thrd[MAX_NUMCORE]; + unsigned int i; + int blkheight = blkrow << 2; + unsigned int srcStride = (srcwidth * blkheight) << 2; + unsigned int destStride = dstRowStride * blkrow; + for (i = 0; i < numcore - 1; i++) { + thrd[i] = new boost::thread(boost::bind(_tx_compress_dxtn, + 4, + srcwidth, + blkheight, + src, + compression, + dest, + dstRowStride)); + src += srcStride; + dest += destStride; + } + thrd[i] = new boost::thread(boost::bind(_tx_compress_dxtn, + 4, + srcwidth, + srcheight - blkheight * i, + src, + compression, + dest, + dstRowStride)); + for (i = 0; i < numcore; i++) { + thrd[i]->join(); + delete thrd[i]; + } +#endif + } else { + (*_tx_compress_dxtn)(4, /* comps: ARGB8888=4, RGB888=3 */ + srcwidth, /* width */ + srcheight, /* height */ + src, /* source */ + compression, /* format */ + dest, /* destination */ + dstRowStride); /* DXT1 = 8 bytes per 4x4 texel + * others = 16 bytes per 4x4 texel */ + } + + /* dxtn adjusts width and height to M4 by replication */ + *destwidth = (srcwidth + 3) & ~3; + *destheight = (srcheight + 3) & ~3; + + bRet = 1; + } + } + + return bRet; +} + +boolean +TxQuantize::compress(uint8 *src, uint8 *dest, + int srcwidth, int srcheight, uint16 srcformat, + int *destwidth, int *destheight, uint16 *destformat, + int compressionType) +{ + boolean bRet = 0; + + switch (compressionType) { + case FXT1_COMPRESSION: + bRet = FXT1(src, dest, + srcwidth, srcheight, srcformat, + destwidth, destheight, destformat); + break; + case S3TC_COMPRESSION: + bRet = DXTn(src, dest, + srcwidth, srcheight, srcformat, + destwidth, destheight, destformat); + break; + case NCC_COMPRESSION: + /* TODO: narrow channel compression */ + ; + } + + return bRet; +} + +#if 0 /* unused */ +void +TxQuantize::I8_ARGB8888(uint32* src, uint32* dest, int width, int height) +{ + int siz = (width * height) >> 2; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaaaaaa + // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa + mov edx, eax; + and eax, 0x000000ff; + mov ebx, eax; // 00000000 00000000 00000000 aaaaaaaa + shl ebx, 8; // 00000000 00000000 aaaaaaaa 00000000 + or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa + shl ebx, 8; // 00000000 aaaaaaaa 00000000 00000000 + or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0x0000ff00; + mov ebx, eax; // 00000000 00000000 aaaaaaaa 00000000 + shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa + or eax, ebx; // 00000000 00000000 aaaaaaaa aaaaaaaa + shl ebx, 16; // 00000000 aaaaaaaa 00000000 00000000 + or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0x00ff0000; + mov ebx, eax; // 00000000 aaaaaaaa 00000000 00000000 + shr ebx, 8; // 00000000 00000000 aaaaaaaa 00000000 + or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa 00000000 + shr ebx, 8; // 00000000 00000000 00000000 aaaaaaaa + or eax, ebx; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa + + mov dword ptr [edi], eax; + add edi, 4; + + mov eax, edx; + and eax, 0xff000000; + mov ebx, eax; // aaaaaaaa 00000000 00000000 00000000 + shr ebx, 8; // 00000000 aaaaaaaa 00000000 00000000 + or eax, ebx; // aaaaaaaa aaaaaaaa 00000000 00000000 + shr ebx, 8; // 00000000 00000000 aaaaaaaa 00000000 + or eax, ebx; // aaaaaaaa aaaaaaaa aaaaaaaa 00000000 + shr eax, 8; // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa + or eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa + + mov dword ptr [edi], eax; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +} + +void +TxQuantize::ARGB8888_I8(uint32* src, uint32* dest, int width, int height) +{ + ARGB8888_A8(src, dest, width, height); +} + +void +TxQuantize::ARGB1555_ABGR8888(uint32* src, uint32* dest, int width, int height) +{ + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // arrr rrgg gggb bbbb + // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + mov edx, eax; // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb + and ebx, 0x00000000; + and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000 + jz transparent1; + or ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000 + + transparent1: + mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb + and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb + shl edx, 14; // edx = 00000000 00000bbb bb000000 00000000 + or ebx, edx; // ebx = aaaaaaaa 00000bbb bb000000 00000000 + shl edx, 5; // edx = 00000000 bbbbb000 00000000 00000000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb bb000000 00000000 + and ebx, 0xffff0000; // ebx = aaaaaaaa bbbbbbbb 00000000 00000000 + mov edx, eax; + and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000 + shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000 + shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000 + and ebx, 0xffffff00; // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000 + mov edx, eax; + and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000 + shr edx, 7; // edx = 00000000 00000000 00000000 rrrrr000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000 + shr edx, 5; // edx = 00000000 00000000 00000000 00000rrr + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + + mov dword ptr [edi], ebx; + add edi, 4; + + shr eax, 16; // eax = 00000000 00000000 arrrrrgg gggbbbbb + mov edx, eax; // edx = 00000000 00000000 arrrrrgg gggbbbbb + and ebx, 0x00000000; + and eax, 0x00008000; // eax = 00000000 00000000 a0000000 00000000 + jz transparent2; + or ebx, 0xff000000; // ebx = aaaaaaaa 00000000 00000000 00000000 + + transparent2: + mov eax, edx; // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb + and edx, 0x0000001f; // edx = 00000000 00000000 00000000 000bbbbb + shl edx, 14; // edx = 00000000 00000bbb bb000000 00000000 + or ebx, edx; // ebx = aaaaaaaa 00000bbb bb000000 00000000 + shl edx, 5; // edx = 00000000 bbbbb000 00000000 00000000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb bb000000 00000000 + and ebx, 0xffff0000; // ebx = aaaaaaaa bbbbbbbb 00000000 00000000 + mov edx, eax; + and edx, 0x000003e0; // edx = 00000000 00000000 000000gg ggg00000 + shl edx, 1; // edx = 00000000 00000000 00000ggg gg000000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000 + shl edx, 5; // edx = 00000000 00000000 ggggg000 00000000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000 + and ebx, 0xffffff00; // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000 + mov edx, eax; + and edx, 0x00007c00; // edx = 00000000 00000000 0rrrrr00 00000000 + shr edx, 7; // edx = 00000000 00000000 00000000 rrrrr000 + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000 + shr edx, 5; // edx = 00000000 00000000 00000000 00000rrr + or ebx, edx; // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + + mov dword ptr [edi], ebx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +} + +void +TxQuantize::ARGB4444_ABGR8888(uint32* src, uint32* dest, int width, int height) +{ + int siz = (width * height) >> 1; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaa rrrr gggg bbbb + // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + mov edx, eax; + and eax, 0x0000ffff; + mov ebx, eax; // 00000000 00000000 aaaarrrr ggggbbbb + and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000 + shl ebx, 12; // 0000aaaa 00000000 00000000 00000000 + or eax, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb + mov ebx, eax; + and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb + shl ebx, 16; // 00000000 0000bbbb 00000000 00000000 + or eax, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggbbbb + mov ebx, eax; + and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000 + shr ebx, 8; // 00000000 00000000 00000000 0000rrrr + and eax, 0xfffffff0; + or eax, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggrrrr + mov ebx, eax; + and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000 + shl ebx, 4; // 00000000 00000000 0000gggg 00000000 + and eax, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr + or eax, ebx; // 0000aaaa 0000bbbb 0000gggg 0000rrrr + mov ebx, eax; + shl ebx, 4; // aaaa0000 bbbb0000 gggg0000 rrrr0000 + or eax, ebx; // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + + mov dword ptr [edi], eax; + + add edi, 4; + + shr edx, 16; + mov ebx, edx; // 00000000 00000000 aaaarrrr ggggbbbb + and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000 + shl ebx, 12; // 0000aaaa 00000000 00000000 00000000 + or edx, ebx; // 0000aaaa 00000000 aaaarrrr ggggbbbb + mov ebx, edx; + and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb + shl ebx, 16; // 00000000 0000bbbb 00000000 00000000 + or edx, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggbbbb + mov ebx, edx; + and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000 + shr ebx, 8; // 00000000 00000000 00000000 0000rrrr + and edx, 0xfffffff0; + or edx, ebx; // 0000aaaa 0000bbbb aaaarrrr ggggrrrr + mov ebx, edx; + and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000 + shl ebx, 4; // 00000000 00000000 0000gggg 00000000 + and edx, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr + or edx, ebx; // 0000aaaa 0000bbbb 0000gggg 0000rrrr + mov ebx, edx; + shl ebx, 4; // aaaa0000 bbbb0000 gggg0000 rrrr0000 + or edx, ebx; // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + + mov dword ptr [edi], edx; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +} + +void +TxQuantize::ARGB8888_ABGR8888(uint32* src, uint32* dest, int width, int height) +{ + int siz = width * height; + + __asm { + push ebx; + push esi; + push edi; + + mov esi, dword ptr [src]; + mov edi, dword ptr [dest]; + mov ecx, dword ptr [siz]; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr + mov edx, eax; + bswap edx; + shr edx, 8; + and eax, 0xff000000; + + or eax, edx; + + mov dword ptr [edi], eax; + add edi, 4; + + dec ecx; + jnz tc1_loop; + + pop edi; + pop esi; + pop ebx; + } +} +#endif diff --git a/Source/GlideHQ/TxQuantize.h b/Source/GlideHQ/TxQuantize.h new file mode 100644 index 000000000..d3c6ae6dc --- /dev/null +++ b/Source/GlideHQ/TxQuantize.h @@ -0,0 +1,99 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXQUANTIZE_H__ +#define __TXQUANTIZE_H__ + +/* Glide64 DXTn workaround + * (0:disable, 1:enable) */ +#define GLIDE64_DXTN 1 + +#include "TxInternal.h" +#include "TxUtil.h" + +class TxQuantize +{ +private: + TxUtil *_txUtil; + int _numcore; + + fxtCompressTexFuncExt _tx_compress_fxt1; + dxtCompressTexFuncExt _tx_compress_dxtn; + + /* fast optimized... well, sort of. */ + void ARGB1555_ARGB8888(uint32* src, uint32* dst, int width, int height); + void ARGB4444_ARGB8888(uint32* src, uint32* dst, int width, int height); + void RGB565_ARGB8888(uint32* src, uint32* dst, int width, int height); + void A8_ARGB8888(uint32* src, uint32* dst, int width, int height); + void AI44_ARGB8888(uint32* src, uint32* dst, int width, int height); + void AI88_ARGB8888(uint32* src, uint32* dst, int width, int height); + + void ARGB8888_ARGB1555(uint32* src, uint32* dst, int width, int height); + void ARGB8888_ARGB4444(uint32* src, uint32* dst, int width, int height); + void ARGB8888_RGB565(uint32* src, uint32* dst, int width, int height); + void ARGB8888_A8(uint32* src, uint32* dst, int width, int height); + void ARGB8888_AI44(uint32* src, uint32* dst, int width, int height); + void ARGB8888_AI88(uint32* src, uint32* dst, int width, int height); + + /* quality */ + void ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height); + void ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height); + void ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height); + void ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height); + void ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height); + void ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height); + + /* compressors */ + boolean FXT1(uint8 *src, uint8 *dest, + int srcwidth, int srcheight, uint16 srcformat, + int *destwidth, int *destheight, uint16 *destformat); + boolean DXTn(uint8 *src, uint8 *dest, + int srcwidth, int srcheight, uint16 srcformat, + int *destwidth, int *destheight, uint16 *destformat); + +public: + TxQuantize(); + ~TxQuantize(); + + /* others */ + void P8_16BPP(uint32* src, uint32* dst, int width, int height, uint32* palette); + + boolean quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer = 1); + + boolean compress(uint8 *src, uint8 *dest, + int srcwidth, int srcheight, uint16 srcformat, + int *destwidth, int *destheight, uint16 *destformat, + int compressionType); + + +#if 0 /* unused */ + void ARGB8888_I8(uint32* src, uint32* dst, int width, int height); + void I8_ARGB8888(uint32* src, uint32* dst, int width, int height); + + void ARGB1555_ABGR8888(uint32* src, uint32* dst, int width, int height); + void ARGB4444_ABGR8888(uint32* src, uint32* dst, int width, int height); + void ARGB8888_ABGR8888(uint32* src, uint32* dst, int width, int height); +#endif +}; + +#endif /* __TXQUANTIZE_H__ */ diff --git a/Source/GlideHQ/TxReSample.cpp b/Source/GlideHQ/TxReSample.cpp new file mode 100644 index 000000000..138428b77 --- /dev/null +++ b/Source/GlideHQ/TxReSample.cpp @@ -0,0 +1,417 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "TxReSample.h" +#include "TxDbg.h" +#include +#include + +#define _USE_MATH_DEFINES +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +int +TxReSample::nextPow2(int num) +{ + num = num - 1; + num = num | (num >> 1); + num = num | (num >> 2); + num = num | (num >> 4); + num = num | (num >> 8); + num = num | (num >> 16); + /*num = num | (num >> 32);*//* for 64bit architecture */ + num = num + 1; + + return num; +} + +boolean +TxReSample::nextPow2(uint8** image, int* width, int* height, int bpp, boolean use_3dfx = 0) +{ + /* NOTE: bpp must be one of the follwing: 8, 16, 24, 32 bits per pixel */ + + if (!*image || !*width || !*height || !bpp) + return 0; + + int row_bytes = ((*width * bpp) >> 3); + int o_row_bytes = row_bytes; + int o_width = *width; + int n_width = *width; + int o_height = *height; + int n_height = *height; + + /* HACKALERT: I have explicitly subtracted (n) from width/height to + * adjust textures that have (n) pixel larger width/height than + * power of 2 size. This is a dirty hack for textures that have + * munged aspect ratio by (n) pixel to the original. + */ + if (n_width > 64) n_width -= 4; + else if (n_width > 16) n_width -= 2; + else if (n_width > 4) n_width -= 1; + + if (n_height > 64) n_height -= 4; + else if (n_height > 16) n_height -= 2; + else if (n_height > 4) n_height -= 1; + + n_width = nextPow2(n_width); + n_height = nextPow2(n_height); + row_bytes = (n_width * bpp) >> 3; + + /* 3dfx Glide3 format, W:H aspect ratio range (8:1 - 1:8) */ + if (use_3dfx) { + if (n_width > n_height) { + if (n_width > (n_height << 3)) + n_height = n_width >> 3; + } else { + if (n_height > (n_width << 3)) { + n_width = n_height >> 3; + row_bytes = (n_width * bpp) >> 3; + } + } + DBG_INFO(80, L"using 3dfx W:H aspect ratio range (8:1 - 1:8).\n"); + } + + /* do we really need to do this ? */ + if (o_width == n_width && o_height == n_height) + return 1; /* nope */ + + DBG_INFO(80, L"expand image to next power of 2 dimensions. %d x %d -> %d x %d\n", + o_width, o_height, n_width, n_height); + + if (o_width > n_width) + o_width = n_width; + + if (o_height > n_height) + o_height = n_height; + + /* allocate memory to read in image */ + uint8 *pow2image = (uint8*)malloc(row_bytes * n_height); + + /* read in image */ + if (pow2image) { + int i, j; + uint8 *tmpimage = *image, *tmppow2image = pow2image; + + for (i = 0; i < o_height; i++) { + /* copy row */ + memcpy(tmppow2image, tmpimage, ((o_width * bpp) >> 3)); + + /* expand to pow2 size by replication */ + for(j = ((o_width * bpp) >> 3); j < row_bytes; j++) + tmppow2image[j] = tmppow2image[j - (bpp >> 3)]; + + tmppow2image += row_bytes; + tmpimage += o_row_bytes; + } + /* expand to pow2 size by replication */ + for (i = o_height; i < n_height; i++) + memcpy(&pow2image[row_bytes * i], &pow2image[row_bytes * (i - 1)], row_bytes); + + free(*image); + + *image = pow2image; + *height = n_height; + *width = n_width; + + return 1; + } + + return 0; +} + +/* Ken Turkowski + * Filters for Common Resampling Tasks + * Apple Computer 1990 + */ +double +TxReSample::tent(double x) +{ + if (x < 0.0) x = -x; + if (x < 1.0) return (1.0 - x); + return 0.0; +} + +double +TxReSample::gaussian(double x) +{ + if (x < 0) x = -x; + if (x < 2.0) return pow(2.0, -2.0 * x * x); + return 0.0; +} + +double +TxReSample::sinc(double x) +{ + if (x == 0) return 1.0; + x *= M_PI; + return (sin(x) / x); +} + +double +TxReSample::lanczos3(double x) +{ + if (x < 0) x = -x; + if (x < 3.0) return (sinc(x) * sinc(x/3.0)); + return 0.0; +} + +/* Don P. Mitchell and Arun N. Netravali + * Reconstruction Filters in Computer Graphics + * SIGGRAPH '88 + * Proceedings of the 15th annual conference on Computer + * graphics and interactive techniques, pp221-228, 1988 + */ +double +TxReSample::mitchell(double x) +{ + if (x < 0) x = -x; + if (x < 2.0) { + const double B = 1.0 / 3.0; + const double C = 1.0 / 3.0; + if (x < 1.0) { + x = (((12.0 - 9.0 * B - 6.0 * C) * (x * x * x)) + + ((-18.0 + 12.0 * B + 6.0 * C) * (x * x)) + + (6.0 - 2.0 * B)); + } else { + x = (((-1.0 * B - 6.0 * C) * (x * x * x)) + + ((6.0 * B + 30.0 * C) * (x * x)) + + ((-12.0 * B - 48.0 * C) * x) + + (8.0 * B + 24.0 * C)); + } + return (x / 6.0); + } + return 0.0; +} + +/* J. F. Kaiser and W. A. Reed + * Data smoothing using low-pass digital filters + * Rev. Sci. instrum. 48 (11), pp1447-1457, 1977 + */ +double +TxReSample::besselI0(double x) +{ + /* zero-order modified bessel function of the first kind */ + const double eps_coeff = 1E-16; /* small enough */ + double xh, sum, pow, ds; + xh = 0.5 * x; + sum = 1.0; + pow = 1.0; + ds = 1.0; + int k = 0; + while (ds > sum * eps_coeff) { + k++; + pow *= (xh / k); + ds = pow * pow; + sum = sum + ds; + } + return sum; +} + +double +TxReSample::kaiser(double x) +{ + const double alpha = 4.0; + const double half_window = 5.0; + const double ratio = x / half_window; + return sinc(x) * besselI0(alpha * sqrt(1 - ratio * ratio)) / besselI0(alpha); +} + +boolean +TxReSample::minify(uint8 **src, int *width, int *height, int ratio) +{ + /* NOTE: src must be ARGB8888, ratio is the inverse representation */ + +#if 0 + if (!*src || ratio < 2) return 0; + + /* Box filtering. + * It would be nice to do Kaiser filtering. + * N64 uses narrow strip textures which makes it hard to filter effectively. + */ + + int x, y, x2, y2, offset, numtexel; + uint32 A, R, G, B, texel; + + int tmpwidth = *width / ratio; + int tmpheight = *height / ratio; + + uint8 *tmptex = (uint8*)malloc((tmpwidth * tmpheight) << 2); + + if (tmptex) { + numtexel = ratio * ratio; + for (y = 0; y < tmpheight; y++) { + offset = ratio * y * *width; + for (x = 0; x < tmpwidth; x++) { + A = R = G = B = 0; + for (y2 = 0; y2 < ratio; y2++) { + for (x2 = 0; x2 < ratio; x2++) { + texel = ((uint32*)*src)[offset + *width * y2 + x2]; + A += (texel >> 24); + R += ((texel >> 16) & 0x000000ff); + G += ((texel >> 8) & 0x000000ff); + B += (texel & 0x000000ff); + } + } + A = (A + ratio) / numtexel; + R = (R + ratio) / numtexel; + G = (G + ratio) / numtexel; + B = (B + ratio) / numtexel; + ((uint32*)tmptex)[y * tmpwidth + x] = ((A << 24) | (R << 16) | (G << 8) | B); + offset += ratio; + } + } + free(*src); + *src = tmptex; + *width = tmpwidth; + *height = tmpheight; + + DBG_INFO(80, L"minification ratio:%d -> %d x %d\n", ratio, *width, *height); + + return 1; + } + + DBG_INFO(80, L"Error: failed minification!\n"); + + return 0; + +#else + + if (!*src || ratio < 2) return 0; + + /* Image Resampling */ + + /* half width of filter window. + * NOTE: must be 1.0 or larger. + * + * kaiser-bessel 5, lanczos3 3, mitchell 2, gaussian 1.5, tent 1 + */ + double half_window = 5.0; + + int x, y, x2, y2, z; + double A, R, G, B; + uint32 texel; + + int tmpwidth = *width / ratio; + int tmpheight = *height / ratio; + + /* resampled destination */ + uint8 *tmptex = (uint8*)malloc((tmpwidth * tmpheight) << 2); + if (!tmptex) return 0; + + /* work buffer. single row */ + uint8 *workbuf = (uint8*)malloc(*width << 2); + if (!workbuf) { + free(tmptex); + return 0; + } + + /* prepare filter lookup table. only half width required for symetric filters. */ + double *weight = (double*)malloc((int)((half_window * ratio) * sizeof(double))); + if (!weight) { + free(tmptex); + free(workbuf); + return 0; + } + for (x = 0; x < half_window * ratio; x++) { + //weight[x] = tent((double)x / ratio) / ratio; + //weight[x] = gaussian((double)x / ratio) / ratio; + //weight[x] = lanczos3((double)x / ratio) / ratio; + //weight[x] = mitchell((double)x / ratio) / ratio; + weight[x] = kaiser((double)x / ratio) / ratio; + } + + /* linear convolution */ + for (y = 0; y < tmpheight; y++) { + for (x = 0; x < *width; x++) { + texel = ((uint32*)*src)[y * ratio * *width + x]; + A = (double)(texel >> 24) * weight[0]; + R = (double)((texel >> 16) & 0xff) * weight[0]; + G = (double)((texel >> 8) & 0xff) * weight[0]; + B = (double)((texel ) & 0xff) * weight[0]; + for (y2 = 1; y2 < half_window * ratio; y2++) { + z = y * ratio + y2; + if (z >= *height) z = *height - 1; + texel = ((uint32*)*src)[z * *width + x]; + A += (double)(texel >> 24) * weight[y2]; + R += (double)((texel >> 16) & 0xff) * weight[y2]; + G += (double)((texel >> 8) & 0xff) * weight[y2]; + B += (double)((texel ) & 0xff) * weight[y2]; + z = y * ratio - y2; + if (z < 0) z = 0; + texel = ((uint32*)*src)[z * *width + x]; + A += (double)(texel >> 24) * weight[y2]; + R += (double)((texel >> 16) & 0xff) * weight[y2]; + G += (double)((texel >> 8) & 0xff) * weight[y2]; + B += (double)((texel ) & 0xff) * weight[y2]; + } + if (A < 0) A = 0; else if (A > 255) A = 255; + if (R < 0) R = 0; else if (R > 255) R = 255; + if (G < 0) G = 0; else if (G > 255) G = 255; + if (B < 0) B = 0; else if (B > 255) B = 255; + ((uint32*)workbuf)[x] = (((uint32)A << 24) | ((uint32)R << 16) | ((uint32)G << 8) | (uint32)B); + } + for (x = 0; x < tmpwidth; x++) { + texel = ((uint32*)workbuf)[x * ratio]; + A = (double)(texel >> 24) * weight[0]; + R = (double)((texel >> 16) & 0xff) * weight[0]; + G = (double)((texel >> 8) & 0xff) * weight[0]; + B = (double)((texel ) & 0xff) * weight[0]; + for (x2 = 1; x2 < half_window * ratio; x2++) { + z = x * ratio + x2; + if (z >= *width) z = *width - 1; + texel = ((uint32*)workbuf)[z]; + A += (double)(texel >> 24) * weight[x2]; + R += (double)((texel >> 16) & 0xff) * weight[x2]; + G += (double)((texel >> 8) & 0xff) * weight[x2]; + B += (double)((texel ) & 0xff) * weight[x2]; + z = x * ratio - x2; + if (z < 0) z = 0; + texel = ((uint32*)workbuf)[z]; + A += (double)(texel >> 24) * weight[x2]; + R += (double)((texel >> 16) & 0xff) * weight[x2]; + G += (double)((texel >> 8) & 0xff) * weight[x2]; + B += (double)((texel ) & 0xff) * weight[x2]; + } + if (A < 0) A = 0; else if (A > 255) A = 255; + if (R < 0) R = 0; else if (R > 255) R = 255; + if (G < 0) G = 0; else if (G > 255) G = 255; + if (B < 0) B = 0; else if (B > 255) B = 255; + ((uint32*)tmptex)[y * tmpwidth + x] = (((uint32)A << 24) | ((uint32)R << 16) | ((uint32)G << 8) | (uint32)B); + } + } + + free(*src); + *src = tmptex; + free(weight); + free(workbuf); + *width = tmpwidth; + *height = tmpheight; + + DBG_INFO(80, L"minification ratio:%d -> %d x %d\n", ratio, *width, *height); + + return 1; +#endif +} diff --git a/Source/GlideHQ/TxReSample.h b/Source/GlideHQ/TxReSample.h new file mode 100644 index 000000000..805647d29 --- /dev/null +++ b/Source/GlideHQ/TxReSample.h @@ -0,0 +1,45 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXRESAMPLE_H__ +#define __TXRESAMPLE_H__ + +#include "TxInternal.h" + +class TxReSample +{ +private: + double tent(double x); + double gaussian(double x); + double sinc(double x); + double lanczos3(double x); + double mitchell(double x); + double besselI0(double x); + double kaiser(double x); +public: + boolean minify(uint8 **src, int *width, int *height, int ratio); + boolean nextPow2(uint8** image, int* width, int* height, int bpp, boolean use_3dfx); + int nextPow2(int num); +}; + +#endif /* __TXRESAMPLE_H__ */ diff --git a/Source/GlideHQ/TxTexCache.cpp b/Source/GlideHQ/TxTexCache.cpp new file mode 100644 index 000000000..68e526402 --- /dev/null +++ b/Source/GlideHQ/TxTexCache.cpp @@ -0,0 +1,80 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef WIN32 +#pragma warning(disable: 4786) +#endif + +/* dump cache to disk (0:disable, 1:enable) */ +#define DUMP_CACHE 1 + +#include "TxTexCache.h" +#include "TxDbg.h" +#include +#include +#include + +TxTexCache::~TxTexCache() +{ +#if DUMP_CACHE + if (_options & DUMP_TEXCACHE) { + /* dump cache to disk */ + std::wstring filename = _ident + L"_MEMORYCACHE.dat"; + CPath cachepath(stdstr().FromUTF16(_path.c_str()).c_str(),""); + cachepath.AppendDirectory("cache"); + + int config = _options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESS_TEX|COMPRESSION_MASK|FORCE16BPP_TEX|GZ_TEXCACHE); + + TxCache::save(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config); + } +#endif +} + +TxTexCache::TxTexCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident, + dispInfoFuncExt callback + ) : TxCache((options & ~GZ_HIRESTEXCACHE), cachesize, path, ident, callback) +{ + /* assert local options */ + if (_path.empty() || _ident.empty() || !_cacheSize) + _options &= ~DUMP_TEXCACHE; + +#if DUMP_CACHE + if (_options & DUMP_TEXCACHE) { + /* find it on disk */ + std::wstring filename = _ident + L"_MEMORYCACHE.dat"; + CPath cachepath(stdstr().FromUTF16(_path.c_str()),""); + cachepath.AppendDirectory("cache"); + int config = _options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESS_TEX|COMPRESSION_MASK|FORCE16BPP_TEX|GZ_TEXCACHE); + + TxCache::load(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config); + } +#endif +} + +boolean +TxTexCache::add(uint64 checksum, GHQTexInfo *info) +{ + if (_cacheSize <= 0) return 0; + + return TxCache::add(checksum, info); +} diff --git a/Source/GlideHQ/TxTexCache.h b/Source/GlideHQ/TxTexCache.h new file mode 100644 index 000000000..b80209533 --- /dev/null +++ b/Source/GlideHQ/TxTexCache.h @@ -0,0 +1,39 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXTEXCACHE_H__ +#define __TXTEXCACHE_H__ + +#include "TxCache.h" + +class TxTexCache : public TxCache +{ +public: + ~TxTexCache(); + TxTexCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident, + dispInfoFuncExt callback); + boolean add(uint64 checksum, /* checksum hi:palette low:texture */ + GHQTexInfo *info); +}; + +#endif /* __TXTEXCACHE_H__ */ diff --git a/Source/GlideHQ/TxUtil.cpp b/Source/GlideHQ/TxUtil.cpp new file mode 100644 index 000000000..4e72cd8da --- /dev/null +++ b/Source/GlideHQ/TxUtil.cpp @@ -0,0 +1,1006 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "TxUtil.h" +#include "TxDbg.h" +#include +#include + +/* + * External libraries + ******************************************************************************/ +TxLoadLib::TxLoadLib() +{ +#ifdef DXTN_DLL + if (!_dxtnlib) + _dxtnlib = LoadLibrary("dxtn"); + + if (_dxtnlib) { + if (!_tx_compress_dxtn) + _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn"); + + if (!_tx_compress_fxt1) + _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode"); + } +#else + _tx_compress_dxtn = tx_compress_dxtn; + _tx_compress_fxt1 = fxt1_encode; + +#endif +} + +TxLoadLib::~TxLoadLib() +{ +#ifdef DXTN_DLL + /* free dynamic library */ + if (_dxtnlib) + FreeLibrary(_dxtnlib); +#endif + +} + +fxtCompressTexFuncExt +TxLoadLib::getfxtCompressTexFuncExt() +{ + return _tx_compress_fxt1; +} + +dxtCompressTexFuncExt +TxLoadLib::getdxtCompressTexFuncExt() +{ + return _tx_compress_dxtn; +} + + +/* + * Utilities + ******************************************************************************/ +uint32 +TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format) +{ + int dataSize = sizeofTx(width, height, format); + + /* for now we use adler32 if something else is better + * we can simply swtich later + */ + /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */ + + /* zlib crc32 */ + return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0); +} + +int +TxUtil::sizeofTx(int width, int height, uint16 format) +{ + int dataSize = 0; + + /* a lookup table for the shifts would be better */ + switch (format) { + case GR_TEXFMT_ARGB_CMP_FXT1: + dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1; + break; + case GR_TEXFMT_ARGB_CMP_DXT1: + dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1; + break; + case GR_TEXFMT_ARGB_CMP_DXT3: + case GR_TEXFMT_ARGB_CMP_DXT5: + dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3); + break; + case GR_TEXFMT_ALPHA_INTENSITY_44: + case GR_TEXFMT_ALPHA_8: + case GR_TEXFMT_INTENSITY_8: + case GR_TEXFMT_P_8: + dataSize = width * height; + break; + case GR_TEXFMT_ARGB_4444: + case GR_TEXFMT_ARGB_1555: + case GR_TEXFMT_RGB_565: + case GR_TEXFMT_ALPHA_INTENSITY_88: + dataSize = (width * height) << 1; + break; + case GR_TEXFMT_ARGB_8888: + dataSize = (width * height) << 2; + break; + default: + /* unsupported format */ + DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format); + ; + } + + return dataSize; +} + +#if 0 /* unused */ +uint32 +TxUtil::chkAlpha(uint32* src, int width, int height) +{ + /* NOTE: _src must be ARGB8888 + * return values + * 0x00000000: 8bit alpha + * 0x00000001: 1bit alpha + * 0xff000001: no alpha + */ + + int _size = width * height; + uint32 alpha = 0; + + __asm { + mov esi, dword ptr [src]; + mov ecx, dword ptr [_size]; + mov ebx, 0xff000000; + + tc1_loop: + mov eax, dword ptr [esi]; + add esi, 4; + + and eax, 0xff000000; + jz alpha1bit; + cmp eax, 0xff000000; + je alpha1bit; + jmp done; + + alpha1bit: + and ebx, eax; + dec ecx; + jnz tc1_loop; + + or ebx, 0x00000001; + mov dword ptr [alpha], ebx; + + done: + } + + return alpha; +} +#endif + +uint32 +TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride) +{ + /* Rice CRC32 for now. We can switch this to Jabo MD5 or + * any other custom checksum. + * TODO: use *_HIRESTEXTURE option. */ + + if (!src) return 0; + + return RiceCRC32(src, width, height, size, rowStride); +} + +uint64 +TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette) +{ + /* Rice CRC32 for now. We can switch this to Jabo MD5 or + * any other custom checksum. + * TODO: use *_HIRESTEXTURE option. */ + /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */ + + if (!src) return 0; + + uint64 crc64Ret = 0; + + if (palette) { + uint32 crc32 = 0, cimax = 0; + switch (size & 0xff) { + case 1: + if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) { + crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512); + crc64Ret <<= 32; + crc64Ret |= (uint64)crc32; + } + break; + case 0: + if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) { + crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32); + crc64Ret <<= 32; + crc64Ret |= (uint64)crc32; + } + } + } + if (!crc64Ret) { + crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride); + } + + return crc64Ret; +} + +/* +** Computes Adler32 checksum for a stream of data. +** +** From the specification found in RFC 1950: (ZLIB Compressed Data Format +** Specification version 3.3) +** +** ADLER32 (Adler-32 checksum) This contains a checksum value of the +** uncompressed data (excluding any dictionary data) computed according to +** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement +** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard. +** +** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of +** all bytes, s2 is the sum of all s1 values. Both sums are done modulo +** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored +** as s2*65536 + s1 in most-significant-byte first (network) order. +** +** 8.2. The Adler-32 algorithm +** +** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still +** provides an extremely low probability of undetected errors. +** +** The modulo on unsigned long accumulators can be delayed for 5552 bytes, +** so the modulo operation time is negligible. If the bytes are a, b, c, +** the second sum is 3a + 2b + c + 3, and so is position and order sensitive, +** unlike the first sum, which is just a checksum. That 65521 is prime is +** important to avoid a possible large class of two-byte errors that leave +** the check unchanged. (The Fletcher checksum uses 255, which is not prime +** and which also makes the Fletcher check insensitive to single byte +** changes 0 <-> 255.) +** +** The sum s1 is initialized to 1 instead of zero to make the length of +** the sequence part of s2, so that the length does not have to be checked +** separately. (Any sequence of zeroes has a Fletcher checksum of zero.) +*/ + +uint32 +TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32) +{ +#if 1 + /* zlib adler32 */ + return adler32(dwAdler32, data, Len); +#else + register uint32 s1 = dwAdler32 & 0xFFFF; + register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF; + int k; + + while (Len > 0) { + /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + k = (Len < 5552 ? Len : 5552); + Len -= k; + while (k--) { + s1 += *data++; + s2 += s1; + } + /* 65521 is the largest prime smaller than 65536 */ + s1 %= 65521; + s2 %= 65521; + } + + return (s2 << 16) | s1; +#endif +} + +uint32 +TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride) +{ + int i; + uint32 ret = 1; + uint32 width_in_bytes = width * size; + + for (i = 0; i < height; i++) { + ret = Adler32(src, width_in_bytes, ret); + src += rowStride; + } + + return ret; +} + +/* Rice CRC32 for hires texture packs */ +/* NOTE: The following is used in Glide64 to calculate the CRC32 + * for Rice hires texture packs. + * + * BYTE* addr = (BYTE*)(gfx.RDRAM + + * rdp.addr[rdp.tiles[tile].t_mem] + + * (rdp.tiles[tile].ul_t * bpl) + + * (((rdp.tiles[tile].ul_s<>1)); + * RiceCRC32(addr, + * rdp.tiles[tile].width, + * rdp.tiles[tile].height, + * (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size), + * bpl); + */ +uint32 +TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride) +{ + /* NOTE: bytes_per_width must be equal or larger than 4 */ + + uint32 crc32Ret = 0; + const uint32 bytes_per_width = ((width << size) + 1) >> 1; + + /*if (bytes_per_width < 4) return 0;*/ + + try { +#ifdef WIN32 + __asm { + push ebx; + push esi; + push edi; + + mov ecx, dword ptr [src]; + mov eax, dword ptr [height]; + mov edx, 0; + dec eax; + + loop2: + mov ebx, dword ptr [bytes_per_width]; + sub ebx, 4; + + loop1: + mov esi, dword ptr [ecx+ebx]; + xor esi, ebx; + rol edx, 4; + add edx, esi; + sub ebx, 4; + jge loop1; + + xor esi, eax; + add edx, esi; + add ecx, dword ptr [rowStride]; + dec eax; + jge loop2; + + mov dword ptr [crc32Ret], edx; + + pop edi; + pop esi; + pop ebx; + } +#else + asm volatile( + "pushl %%ebx \n" + "pushl %%esi \n" + "pushl %%edi \n" + + "movl %0, %%ecx \n" + "movl %1, %%eax \n" + "movl $0, %%edx \n" + "decl %%eax \n" + + "0: \n" + "movl %2, %%ebx \n" + "subl $4, %%ebx \n" + + "1: \n" + "movl (%%ecx,%%ebx), %%esi \n" + "xorl %%ebx, %%esi \n" + "roll $4, %%edx \n" + "addl %%esi, %%edx \n" + "subl $4, %%ebx \n" + "jge 1b \n" + + "xorl %%eax, %%esi \n" + "addl %%esi, %%edx \n" + "addl %3, %%ecx \n" + "decl %%eax \n" + "jge 0b \n" + + "movl %%edx, %4 \n" + + "popl %%edi \n" + "popl %%esi \n" + "popl %%ebx \n" + : + : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret) + : "memory", "cc" + ); +#endif + } catch(...) { + DBG_INFO(80, L"Error: RiceCRC32 exception!\n"); + } + + return crc32Ret; +} + +boolean +TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride, + uint32* crc32, uint32* cimax) +{ + /* NOTE: bytes_per_width must be equal or larger than 4 */ + + uint32 crc32Ret = 0; + uint32 cimaxRet = 0; + const uint32 bytes_per_width = ((width << size) + 1) >> 1; + + /*if (bytes_per_width < 4) return 0;*/ + + /* 4bit CI */ + try { +#ifdef WIN32 + __asm { + push ebx; + push esi; + push edi; + + mov ecx, dword ptr [src]; + mov eax, dword ptr [height]; + mov edx, 0; + mov edi, 0; + dec eax; + + loop2: + mov ebx, dword ptr [bytes_per_width]; + sub ebx, 4; + + loop1: + mov esi, dword ptr [ecx+ebx]; + + cmp edi, 0x0000000f; + je findmax0; + + push ecx; + mov ecx, esi; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax8; + mov edi, ecx; + + findmax8: + mov ecx, esi; + shr ecx, 4; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax7; + mov edi, ecx; + + findmax7: + mov ecx, esi; + shr ecx, 8; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax6; + mov edi, ecx; + + findmax6: + mov ecx, esi; + shr ecx, 12; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax5; + mov edi, ecx; + + findmax5: + mov ecx, esi; + shr ecx, 16; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax4; + mov edi, ecx; + + findmax4: + mov ecx, esi; + shr ecx, 20; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax3; + mov edi, ecx; + + findmax3: + mov ecx, esi; + shr ecx, 24; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax2; + mov edi, ecx; + + findmax2: + mov ecx, esi; + shr ecx, 28; + and ecx, 0x0000000f; + cmp ecx, edi; + jb findmax1; + mov edi, ecx; + + findmax1: + pop ecx; + + findmax0: + xor esi, ebx; + rol edx, 4; + add edx, esi; + sub ebx, 4; + jge loop1; + + xor esi, eax; + add edx, esi; + add ecx, dword ptr [rowStride]; + dec eax; + jge loop2; + + mov dword ptr [crc32Ret], edx; + mov dword ptr [cimaxRet], edi; + + pop edi; + pop esi; + pop ebx; + } +#else + asm volatile( + "pushl %%ebx \n" + "pushl %%esi \n" + "pushl %%edi \n" + + "movl %0, %%ecx \n" + "movl %1, %%eax \n" + "movl $0, %%edx \n" + "movl $0, %%edi \n" + "decl %%eax \n" + + "0: \n" + "movl %2, %%ebx \n" + "subl $4, %%ebx \n" + + "1: \n" + "movl (%%ecx,%%ebx), %%esi \n" + + "cmpl $0x0000000f, %%edi \n" + "je 10f \n" + + "pushl %%ecx \n" + "movl %%esi, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 2f \n" + "movl %%ecx, %%edi \n" + + "2: \n" + "movl %%esi, %%ecx \n" + "shrl $4, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 3f \n" + "movl %%ecx, %%edi \n" + + "3: \n" + "movl %%esi, %%ecx \n" + "shrl $8, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 4f \n" + "movl %%ecx, %%edi \n" + + "4: \n" + "movl %%esi, %%ecx \n" + "shrl $12, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 5f \n" + "movl %%ecx, %%edi \n" + + "5: \n" + "movl %%esi, %%ecx \n" + "shrl $16, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 6f \n" + "movl %%ecx, %%edi \n" + + "6: \n" + "movl %%esi, %%ecx \n" + "shrl $20, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 7f \n" + "movl %%ecx, %%edi \n" + + "7: \n" + "movl %%esi, %%ecx \n" + "shrl $24, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 8f \n" + "movl %%ecx, %%edi \n" + + "8: \n" + "movl %%esi, %%ecx \n" + "shrl $28, %%ecx \n" + "andl $0x0000000f, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 9f \n" + "movl %%ecx, %%edi \n" + + "9: \n" + "popl %%ecx \n" + + "10: \n" + "xorl %%ebx, %%esi \n" + "roll $4, %%edx \n" + "addl %%esi, %%edx \n" + "subl $4, %%ebx \n" + "jge 1b \n" + + "xorl %%eax, %%esi \n" + "addl %%esi, %%edx \n" + "addl %3, %%ecx \n" + "decl %%eax \n" + "jge 0b \n" + + "movl %%edx, %4 \n" + "movl %%edi, %5 \n" + + "popl %%edi \n" + "popl %%esi \n" + "popl %%ebx \n" + : + : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet) + : "memory", "cc" + ); +#endif + } catch(...) { + DBG_INFO(80, L"Error: RiceCRC32 exception!\n"); + } + + *crc32 = crc32Ret; + *cimax = cimaxRet; + + return 1; +} + +boolean +TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride, + uint32* crc32, uint32* cimax) +{ + /* NOTE: bytes_per_width must be equal or larger than 4 */ + + uint32 crc32Ret = 0; + uint32 cimaxRet = 0; + const uint32 bytes_per_width = ((width << size) + 1) >> 1; + + /*if (bytes_per_width < 4) return 0;*/ + + /* 8bit CI */ + try { +#ifdef WIN32 + __asm { + push ebx; + push esi; + push edi; + + mov ecx, dword ptr [src]; + mov eax, dword ptr [height]; + mov edx, 0; + mov edi, 0; + dec eax; + + loop2: + mov ebx, dword ptr [bytes_per_width]; + sub ebx, 4; + + loop1: + mov esi, dword ptr [ecx+ebx]; + + cmp edi, 0x000000ff; + je findmax0; + + push ecx; + mov ecx, esi; + and ecx, 0x000000ff; + cmp ecx, edi; + jb findmax4; + mov edi, ecx; + + findmax4: + mov ecx, esi; + shr ecx, 8; + and ecx, 0x000000ff; + cmp ecx, edi; + jb findmax3; + mov edi, ecx; + + findmax3: + mov ecx, esi; + shr ecx, 16; + and ecx, 0x000000ff; + cmp ecx, edi; + jb findmax2; + mov edi, ecx; + + findmax2: + mov ecx, esi; + shr ecx, 24; + and ecx, 0x000000ff; + cmp ecx, edi; + jb findmax1; + mov edi, ecx; + + findmax1: + pop ecx; + + findmax0: + xor esi, ebx; + rol edx, 4; + add edx, esi; + sub ebx, 4; + jge loop1; + + xor esi, eax; + add edx, esi; + add ecx, dword ptr [rowStride]; + dec eax; + jge loop2; + + mov dword ptr [crc32Ret], edx; + mov dword ptr [cimaxRet], edi; + + pop edi; + pop esi; + pop ebx; + } +#else + asm volatile( + "pushl %%ebx \n" + "pushl %%esi \n" + "pushl %%edi \n" + + "movl %0, %%ecx \n" + "movl %1, %%eax \n" + "movl $0, %%edx \n" + "movl $0, %%edi \n" + "decl %%eax \n" + + "0: \n" + "movl %2, %%ebx \n" + "subl $4, %%ebx \n" + + "1: \n" + "movl (%%ecx,%%ebx), %%esi \n" + + "cmpl $0x000000ff, %%edi \n" + "je 6f \n" + + "pushl %%ecx \n" + "movl %%esi, %%ecx \n" + "andl $0x000000ff, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 2f \n" + "movl %%ecx, %%edi \n" + + "2: \n" + "movl %%esi, %%ecx \n" + "shrl $8, %%ecx \n" + "andl $0x000000ff, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 3f \n" + "movl %%ecx, %%edi \n" + + "3: \n" + "movl %%esi, %%ecx \n" + "shrl $16, %%ecx \n" + "andl $0x000000ff, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 4f \n" + "movl %%ecx, %%edi \n" + + "4: \n" + "movl %%esi, %%ecx \n" + "shrl $24, %%ecx \n" + "andl $0x000000ff, %%ecx \n" + "cmpl %%edi, %%ecx \n" + "jb 5f \n" + "movl %%ecx, %%edi \n" + + "5: \n" + "popl %%ecx \n" + + "6: \n" + "xorl %%ebx, %%esi \n" + "roll $4, %%edx \n" + "addl %%esi, %%edx \n" + "subl $4, %%ebx \n" + "jge 1b \n" + + "xorl %%eax, %%esi \n" + "addl %%esi, %%edx \n" + "addl %3, %%ecx \n" + "decl %%eax \n" + "jge 0b \n" + + "movl %%edx, %4 \n" + "movl %%edi, %5 \n" + + "popl %%edi \n" + "popl %%esi \n" + "popl %%ebx \n" + : + : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet) + : "memory", "cc" + ); +#endif + } catch(...) { + DBG_INFO(80, L"Error: RiceCRC32 exception!\n"); + } + + *crc32 = crc32Ret; + *cimax = cimaxRet; + + return 1; +} + +int +TxUtil::log2(int num) +{ + int i = 0; + +#if 1 + if (!num) return 0; +#ifdef WIN32 + __asm { + mov eax, dword ptr [num]; + bsr eax, eax; + mov dword ptr [i], eax; + } +#else + asm volatile( + "movl %0, %%eax \n" + "bsrl %%eax, %%eax \n" + "movl %%eax, %1 \n" + : + : "m"(num), "m"(i) + : "memory", "cc" + ); +#endif +#else + switch (num) { + case 1: return 0; + case 2: return 1; + case 4: return 2; + case 8: return 3; + case 16: return 4; + case 32: return 5; + case 64: return 6; + case 128: return 7; + case 256: return 8; + case 512: return 9; + case 1024: return 10; + case 2048: return 11; + } +#endif + + return i; +} + +int +TxUtil::grLodLog2(int w, int h) +{ + return (w >= h ? log2(w) : log2(h)); +} + +int +TxUtil::grAspectRatioLog2(int w, int h) +{ + return (w >= h ? log2(w/h) : -log2(h/w)); +} + +int +TxUtil::getNumberofProcessors() +{ + int numcore = 1; + + /* number of logical processors per physical processor */ + try { +#ifdef WIN32 +#if 1 + /* use win32 api */ + SYSTEM_INFO siSysInfo; + ZeroMemory(&siSysInfo, sizeof(SYSTEM_INFO)); + GetSystemInfo(&siSysInfo); + numcore = siSysInfo.dwNumberOfProcessors; +#else + __asm { + push ebx; + + mov eax, 1; + cpuid; + test edx, 0x10000000; /* check HTT */ + jz uniproc; + and ebx, 0x00ff0000; /* mask logical core counter bit */ + shr ebx, 16; + mov dword ptr [numcore], ebx; + uniproc: + + pop ebx; + } +#endif +#else + asm volatile( + "pushl %%ebx \n" + + "movl $1, %%eax \n" + "cpuid \n" + "testl $0x10000000, %%edx \n" + "jz 0f \n" + "andl $0x00ff0000, %%ebx \n" + "shrl $16, %%ebx \n" + "movl %%ebx, %0 \n" + "0: \n" + + "popl %%ebx \n" + : + : "m"(numcore) + : "memory", "cc" + ); +#endif + } catch(...) { + DBG_INFO(80, L"Error: number of processor detection failed!\n"); + } + + if (numcore > MAX_NUMCORE) numcore = MAX_NUMCORE; + + DBG_INFO(80, L"Number of processors : %d\n", numcore); + + return numcore; +} + + +/* + * Memory buffers for texture manipulations + ******************************************************************************/ +TxMemBuf::TxMemBuf() +{ + int i; + for (i = 0; i < 2; i++) { + _tex[i] = NULL; + _size[i] = 0; + } +} + +TxMemBuf::~TxMemBuf() +{ + shutdown(); +} + +boolean +TxMemBuf::init(int maxwidth, int maxheight) +{ + int i; + for (i = 0; i < 2; i++) { + if (!_tex[i]) { + _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4); + _size[i] = maxwidth * maxheight * 4; + } + + if (!_tex[i]) { + shutdown(); + return 0; + } + } + return 1; +} + +void +TxMemBuf::shutdown() +{ + int i; + for (i = 0; i < 2; i++) { + if (_tex[i]) free(_tex[i]); + _tex[i] = NULL; + _size[i] = 0; + } +} + +uint8* +TxMemBuf::get(unsigned int num) +{ + return ((num < 2) ? _tex[num] : NULL); +} + +uint32 +TxMemBuf::size_of(unsigned int num) +{ + return ((num < 2) ? _size[num] : 0); +} diff --git a/Source/GlideHQ/TxUtil.h b/Source/GlideHQ/TxUtil.h new file mode 100644 index 000000000..b89f660df --- /dev/null +++ b/Source/GlideHQ/TxUtil.h @@ -0,0 +1,121 @@ +/* + * Texture Filtering + * Version: 1.0 + * + * Copyright (C) 2007 Hiroshi Morii All Rights Reserved. + * Email koolsmoky(at)users.sourceforge.net + * Web http://www.3dfxzone.it/koolsmoky + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __TXUTIL_H__ +#define __TXUTIL_H__ + +/* maximum number of CPU cores allowed */ +#define MAX_NUMCORE 8 + +#include "TxInternal.h" +#include + +#ifndef DXTN_DLL +#ifdef __cplusplus +extern "C"{ +#endif +void tx_compress_dxtn(int srccomps, int width, int height, + const void *source, int destformat, void *dest, + int destRowStride); + +int fxt1_encode(int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); +#ifdef __cplusplus +} +#endif +#endif /* DXTN_DLL */ + +typedef void (*dxtCompressTexFuncExt)(int srccomps, int width, + int height, const void *srcPixData, + int destformat, void *dest, + int dstRowStride); + +typedef int (*fxtCompressTexFuncExt)(int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); + +class TxLoadLib +{ +private: +#ifdef DXTN_DLL + HMODULE _dxtnlib; +#endif + fxtCompressTexFuncExt _tx_compress_fxt1; + dxtCompressTexFuncExt _tx_compress_dxtn; + TxLoadLib(); +public: + static TxLoadLib* getInstance() { + static TxLoadLib txLoadLib; + return &txLoadLib; + } + ~TxLoadLib(); + fxtCompressTexFuncExt getfxtCompressTexFuncExt(); + dxtCompressTexFuncExt getdxtCompressTexFuncExt(); +}; + +class TxUtil +{ +private: + uint32 Adler32(const uint8* data, int Len, uint32 Adler); + uint32 Adler32(const uint8* src, int width, int height, int size, int rowStride); + uint32 RiceCRC32(const uint8* src, int width, int height, int size, int rowStride); + boolean RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride, + uint32* crc32, uint32* cimax); + boolean RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride, + uint32* crc32, uint32* cimax); + int log2(int num); +public: + TxUtil() { } + ~TxUtil() { } + int sizeofTx(int width, int height, uint16 format); + uint32 checksumTx(uint8 *data, int width, int height, uint16 format); +#if 0 /* unused */ + uint32 chkAlpha(uint32* src, int width, int height); +#endif + uint32 checksum(uint8 *src, int width, int height, int size, int rowStride); + uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette); + int grLodLog2(int w, int h); + int grAspectRatioLog2(int w, int h); + int getNumberofProcessors(); +}; + +class TxMemBuf +{ +private: + uint8 *_tex[2]; + uint32 _size[2]; + TxMemBuf(); +public: + static TxMemBuf* getInstance() { + static TxMemBuf txMemBuf; + return &txMemBuf; + } + ~TxMemBuf(); + boolean init(int maxwidth, int maxheight); + void shutdown(void); + uint8 *get(unsigned int num); + uint32 size_of(unsigned int num); +}; + +#endif /* __TXUTIL_H__ */ diff --git a/Source/GlideHQ/bldno.h b/Source/GlideHQ/bldno.h new file mode 100644 index 000000000..e69de29bb diff --git a/Source/GlideHQ/tc-1.1+/dxtn.c b/Source/GlideHQ/tc-1.1+/dxtn.c new file mode 100644 index 000000000..e2d335ae0 --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/dxtn.c @@ -0,0 +1,884 @@ +/* + * DXTn codec + * Version: 1.1 + * + * Copyright (C) 2004 Daniel Borca All Rights Reserved. + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Copyright (C) 2007 Hiroshi Morii + * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and + * YUV conversions to determine representative colors. + */ + + +#include +#include +#include + +#include + +#include "types.h" +#include "internal.h" +#include "dxtn.h" + + +/***************************************************************************\ + * DXTn encoder + * + * The encoder was built by reversing the decoder, + * and is vaguely based on FXT1 codec. Note that this code + * is merely a proof of concept, since it is highly UNoptimized! +\***************************************************************************/ + + +#define MAX_COMP 4 /* ever needed maximum number of components in texel */ +#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ +#define N_TEXELS 16 /* number of texels in a block (always 16) */ +#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3)) + + +static const int dxtn_color_tlat[2][4] = { + { 0, 2, 3, 1 }, + { 0, 2, 1, 3 } +}; + +static const int dxtn_alpha_tlat[2][8] = { + { 0, 2, 3, 4, 5, 6, 7, 1 }, + { 0, 2, 3, 4, 5, 1, 6, 7 } +}; + + +static void +dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps) +{ + float b, iv[MAX_COMP]; /* interpolation vector */ + + dword hi; /* high doubleword */ + int color0, color1; + int n_vect; + const int n_comp = 3; + int black = 0; + +#ifndef YUV + int minSum = 2000; /* big enough */ +#else + int minSum = 2000000; +#endif + int maxSum = -1; /* small enough */ + int minCol = 0; /* phoudoin: silent compiler! */ + int maxCol = 0; /* phoudoin: silent compiler! */ + + byte input[N_TEXELS][MAX_COMP]; + int i, k, l; + + /* make the whole block opaque */ + /* we will NEVER reference ACOMP of any pixel */ + + /* 4 texels each line */ +#ifndef ARGB + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + for (i = 0; i < comps; i++) { + input[k + l * 4][i] = *lines[l]++; + } + } + } +#else + /* H.Morii - support for ARGB inputs */ + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + input[k + l * 4][2] = *lines[l]++; + input[k + l * 4][1] = *lines[l]++; + input[k + l * 4][0] = *lines[l]++; + if (comps == 4) input[k + l * 4][3] = *lines[l]++; + } + } +#endif + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ + for (k = 0; k < N_TEXELS; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + /* RGB to YUV conversion according to CCIR 601 specs + * Y = 0.299R+0.587G+0.114B + * U = 0.713(R - Y) = 0.500R-0.419G-0.081B + * V = 0.564(B - Y) = -0.169R-0.331G+0.500B + */ + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minCol = k; + } + if (maxSum < sum) { + maxSum = sum; + maxCol = k; + } + if (sum == 0) { + black = 1; + } + } + + color0 = COLOR565(input[minCol]); + color1 = COLOR565(input[maxCol]); + + if (color0 == color1) { + /* we'll use 3-vector */ + cc[0] = color0 | (color1 << 16); + hi = black ? -1 : 0; + } else { + if (black && ((color0 == 0) || (color1 == 0))) { + /* we still can use 4-vector */ + black = 0; + } + + if (black ^ (color0 <= color1)) { + int aux; + aux = color0; + color0 = color1; + color1 = aux; + aux = minCol; + minCol = maxCol; + maxCol = aux; + } + n_vect = (color0 <= color1) ? 2 : 3; + + MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); + + /* add in texels */ + cc[0] = color0 | (color1 << 16); + hi = 0; + for (k = N_TEXELS - 1; k >= 0; k--) { + int texel = 3; + int sum = 0; + if (black) { + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } + } + if (!black || sum) { + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + texel = dxtn_color_tlat[black][texel]; + } + /* add in texel */ + hi <<= 2; + hi |= texel; + } + } + cc[1] = hi; +} + + +static void +dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps) +{ + float b, iv[MAX_COMP]; /* interpolation vector */ + + dword hi; /* high doubleword */ + int color0, color1; + int n_vect; + const int n_comp = 3; + int transparent = 0; + +#ifndef YUV + int minSum = 2000; /* big enough */ +#else + int minSum = 2000000; +#endif + int maxSum = -1; /* small enough */ + int minCol = 0; /* phoudoin: silent compiler! */ + int maxCol = 0; /* phoudoin: silent compiler! */ + + byte input[N_TEXELS][MAX_COMP]; + int i, k, l; + + if (comps == 3) { + /* make the whole block opaque */ + memset(input, -1, sizeof(input)); + } + + /* 4 texels each line */ +#ifndef ARGB + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + for (i = 0; i < comps; i++) { + input[k + l * 4][i] = *lines[l]++; + } + } + } +#else + /* H.Morii - support for ARGB inputs */ + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + input[k + l * 4][2] = *lines[l]++; + input[k + l * 4][1] = *lines[l]++; + input[k + l * 4][0] = *lines[l]++; + if (comps == 4) input[k + l * 4][3] = *lines[l]++; + } + } +#endif + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ + for (k = 0; k < N_TEXELS; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minCol = k; + } + if (maxSum < sum) { + maxSum = sum; + maxCol = k; + } + if (input[k][ACOMP] < 128) { + transparent = 1; + } + } + + color0 = COLOR565(input[minCol]); + color1 = COLOR565(input[maxCol]); + + if (color0 == color1) { + /* we'll use 3-vector */ + cc[0] = color0 | (color1 << 16); + hi = transparent ? -1 : 0; + } else { + if (transparent ^ (color0 <= color1)) { + int aux; + aux = color0; + color0 = color1; + color1 = aux; + aux = minCol; + minCol = maxCol; + maxCol = aux; + } + n_vect = (color0 <= color1) ? 2 : 3; + + MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); + + /* add in texels */ + cc[0] = color0 | (color1 << 16); + hi = 0; + for (k = N_TEXELS - 1; k >= 0; k--) { + int texel = 3; + if (input[k][ACOMP] >= 128) { + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + texel = dxtn_color_tlat[transparent][texel]; + } + /* add in texel */ + hi <<= 2; + hi |= texel; + } + } + cc[1] = hi; +} + + +static void +dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps) +{ + float b, iv[MAX_COMP]; /* interpolation vector */ + + dword lolo, lohi; /* low quadword: lo dword, hi dword */ + dword hihi; /* high quadword: high dword */ + int color0, color1; + const int n_vect = 3; + const int n_comp = 3; + +#ifndef YUV + int minSum = 2000; /* big enough */ +#else + int minSum = 2000000; +#endif + int maxSum = -1; /* small enough */ + int minCol = 0; /* phoudoin: silent compiler! */ + int maxCol = 0; /* phoudoin: silent compiler! */ + + byte input[N_TEXELS][MAX_COMP]; + int i, k, l; + + if (comps == 3) { + /* make the whole block opaque */ + memset(input, -1, sizeof(input)); + } + + /* 4 texels each line */ +#ifndef ARGB + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + for (i = 0; i < comps; i++) { + input[k + l * 4][i] = *lines[l]++; + } + } + } +#else + /* H.Morii - support for ARGB inputs */ + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + input[k + l * 4][2] = *lines[l]++; + input[k + l * 4][1] = *lines[l]++; + input[k + l * 4][0] = *lines[l]++; + if (comps == 4) input[k + l * 4][3] = *lines[l]++; + } + } +#endif + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ + for (k = 0; k < N_TEXELS; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minCol = k; + } + if (maxSum < sum) { + maxSum = sum; + maxCol = k; + } + } + + /* add in alphas */ + lolo = lohi = 0; + for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { + /* add in alpha */ + lohi <<= 4; + lohi |= input[k][ACOMP] >> 4; + } + cc[1] = lohi; + for (; k >= 0; k--) { + /* add in alpha */ + lolo <<= 4; + lolo |= input[k][ACOMP] >> 4; + } + cc[0] = lolo; + + color0 = COLOR565(input[minCol]); + color1 = COLOR565(input[maxCol]); + +#ifdef RADEON + /* H.Morii - Workaround for ATI Radeon + * According to the OpenGL EXT_texture_compression_s3tc specs, + * the encoding of the RGB components for DXT3 and DXT5 formats + * use the non-transparent encodings of DXT1 but treated as + * though color0 > color1, regardless of the actual values of + * color0 and color1. ATI Radeons however require the values to + * be color0 > color1. + */ + if (color0 < color1) { + int aux; + aux = color0; + color0 = color1; + color1 = aux; + aux = minCol; + minCol = maxCol; + maxCol = aux; + } +#endif + + cc[2] = color0 | (color1 << 16); + + hihi = 0; + if (color0 != color1) { + MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); + + /* add in texels */ + for (k = N_TEXELS - 1; k >= 0; k--) { + int texel; + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + texel = dxtn_color_tlat[0][texel]; + /* add in texel */ + hihi <<= 2; + hihi |= texel; + } + } + cc[3] = hihi; +} + + +static void +dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps) +{ + float b, iv[MAX_COMP]; /* interpolation vector */ + + qword lo; /* low quadword */ + dword hihi; /* high quadword: high dword */ + int color0, color1; + const int n_vect = 3; + const int n_comp = 3; + +#ifndef YUV + int minSum = 2000; /* big enough */ +#else + int minSum = 2000000; +#endif + int maxSum = -1; /* small enough */ + int minCol = 0; /* phoudoin: silent compiler! */ + int maxCol = 0; /* phoudoin: silent compiler! */ + int alpha0 = 2000; /* big enough */ + int alpha1 = -1; /* small enough */ + int anyZero = 0, anyOne = 0; + int a_vect; + + byte input[N_TEXELS][MAX_COMP]; + int i, k, l; + + if (comps == 3) { + /* make the whole block opaque */ + memset(input, -1, sizeof(input)); + } + + /* 4 texels each line */ +#ifndef ARGB + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + for (i = 0; i < comps; i++) { + input[k + l * 4][i] = *lines[l]++; + } + } + } +#else + /* H.Morii - support for ARGB inputs */ + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + input[k + l * 4][2] = *lines[l]++; + input[k + l * 4][1] = *lines[l]++; + input[k + l * 4][0] = *lines[l]++; + if (comps == 4) input[k + l * 4][3] = *lines[l]++; + } + } +#endif + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ + for (k = 0; k < N_TEXELS; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minCol = k; + } + if (maxSum < sum) { + maxSum = sum; + maxCol = k; + } + if (alpha0 > input[k][ACOMP]) { + alpha0 = input[k][ACOMP]; + } + if (alpha1 < input[k][ACOMP]) { + alpha1 = input[k][ACOMP]; + } + if (input[k][ACOMP] == 0) { + anyZero = 1; + } + if (input[k][ACOMP] == 255) { + anyOne = 1; + } + } + + /* add in alphas */ + if (alpha0 == alpha1) { + /* we'll use 6-vector */ + cc[0] = alpha0 | (alpha1 << 8); + cc[1] = 0; + } else { + if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) { + /* we still might use 8-vector */ + anyZero = 0; + } + if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) { + /* we still might use 8-vector */ + anyOne = 0; + } + if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) { + int aux; + aux = alpha0; + alpha0 = alpha1; + alpha1 = aux; + } + a_vect = (alpha0 <= alpha1) ? 5 : 7; + + /* compute interpolation vector */ + iv[ACOMP] = (float)a_vect / (alpha1 - alpha0); + b = -iv[ACOMP] * alpha0 + 0.5F; + + /* add in alphas */ + Q_MOV32(lo, 0); + for (k = N_TEXELS - 1; k >= 0; k--) { + int texel = -1; + if (anyZero | anyOne) { + if (input[k][ACOMP] == 0) { + texel = 6; + } else if (input[k][ACOMP] == 255) { + texel = 7; + } + } + /* interpolate alpha */ + if (texel == -1) { + float dot = input[k][ACOMP] * iv[ACOMP]; + texel = (int)(dot + b); +#if SAFECDOT + if (texel < 0) { + texel = 0; + } else if (texel > a_vect) { + texel = a_vect; + } +#endif + texel = dxtn_alpha_tlat[anyZero | anyOne][texel]; + } + /* add in texel */ + Q_SHL(lo, 3); + Q_OR32(lo, texel); + } + Q_SHL(lo, 16); + Q_OR32(lo, alpha0 | (alpha1 << 8)); + ((qword *)cc)[0] = lo; + } + + color0 = COLOR565(input[minCol]); + color1 = COLOR565(input[maxCol]); + +#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */ + if (color0 < color1) { + int aux; + aux = color0; + color0 = color1; + color1 = aux; + aux = minCol; + minCol = maxCol; + maxCol = aux; + } +#endif + + cc[2] = color0 | (color1 << 16); + + hihi = 0; + if (color0 != color1) { + MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]); + + /* add in texels */ + for (k = N_TEXELS - 1; k >= 0; k--) { + int texel; + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + texel = dxtn_color_tlat[0][texel]; + /* add in texel */ + hihi <<= 2; + hihi |= texel; + } + } + cc[3] = hihi; +} + + +#define ENCODER(dxtn, n) \ +int TAPIENTRY \ +dxtn##_encode (int width, int height, int comps, \ + const void *source, int srcRowStride, \ + void *dest, int destRowStride) \ +{ \ + int x, y; \ + const byte *data; \ + dword *encoded = (dword *)dest; \ + void *newSource = NULL; \ + \ + /* Replicate image if width is not M4 or height is not M4 */ \ + if ((width & 3) | (height & 3)) { \ + int newWidth = (width + 3) & ~3; \ + int newHeight = (height + 3) & ~3; \ + newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\ + _mesa_upscale_teximage2d(width, height, newWidth, newHeight, \ + comps, (const byte *)source, \ + srcRowStride, (byte *)newSource); \ + source = newSource; \ + width = newWidth; \ + height = newHeight; \ + srcRowStride = comps * newWidth; \ + } \ + \ + data = (const byte *)source; \ + destRowStride = (destRowStride - width * n) / 4; \ + for (y = 0; y < height; y += 4) { \ + unsigned int offs = 0 + (y + 0) * srcRowStride; \ + for (x = 0; x < width; x += 4) { \ + const byte *lines[4]; \ + lines[0] = &data[offs]; \ + lines[1] = lines[0] + srcRowStride; \ + lines[2] = lines[1] + srcRowStride; \ + lines[3] = lines[2] + srcRowStride; \ + offs += 4 * comps; \ + dxtn##_quantize(encoded, lines, comps); \ + /* 4x4 block */ \ + encoded += n; \ + } \ + encoded += destRowStride; \ + } \ + \ + if (newSource != NULL) { \ + free(newSource); \ + } \ + \ + return 0; \ +} + +ENCODER(dxt1_rgb, 2) +ENCODER(dxt1_rgba, 2) +ENCODER(dxt3_rgba, 4) +ENCODER(dxt5_rgba, 4) + + +/***************************************************************************\ + * DXTn decoder + * + * The decoder is based on GL_EXT_texture_compression_s3tc + * specification and serves as a concept for the encoder. +\***************************************************************************/ + + +/* lookup table for scaling 4 bit colors up to 8 bits */ +static const byte _rgb_scale_4[] = { + 0, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255 +}; + +/* lookup table for scaling 5 bit colors up to 8 bits */ +static const byte _rgb_scale_5[] = { + 0, 8, 16, 25, 33, 41, 49, 58, + 66, 74, 82, 90, 99, 107, 115, 123, + 132, 140, 148, 156, 165, 173, 181, 189, + 197, 206, 214, 222, 230, 239, 247, 255 +}; + +/* lookup table for scaling 6 bit colors up to 8 bits */ +static const byte _rgb_scale_6[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 45, 49, 53, 57, 61, + 65, 69, 73, 77, 81, 85, 89, 93, + 97, 101, 105, 109, 113, 117, 121, 125, + 130, 134, 138, 142, 146, 150, 154, 158, + 162, 166, 170, 174, 178, 182, 186, 190, + 194, 198, 202, 206, 210, 215, 219, 223, + 227, 231, 235, 239, 243, 247, 251, 255 +}; + + +#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31)) +#define UP4(c) _rgb_scale_4[(c) & 15] +#define UP5(c) _rgb_scale_5[(c) & 31] +#define UP6(c) _rgb_scale_6[(c) & 63] +#define ZERO_4UBV(v) *((dword *)(v)) = 0 + + +void TAPIENTRY +dxt1_rgb_decode_1 (const void *texture, int stride, + int i, int j, byte *rgba) +{ + const byte *src = (const byte *)texture + + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8; + const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3; + if (code == 0) { + rgba[RCOMP] = UP5(CC_SEL(src, 11)); + rgba[GCOMP] = UP6(CC_SEL(src, 5)); + rgba[BCOMP] = UP5(CC_SEL(src, 0)); + } else if (code == 1) { + rgba[RCOMP] = UP5(CC_SEL(src, 27)); + rgba[GCOMP] = UP6(CC_SEL(src, 21)); + rgba[BCOMP] = UP5(CC_SEL(src, 16)); + } else { + const word col0 = src[0] | (src[1] << 8); + const word col1 = src[2] | (src[3] << 8); + if (col0 > col1) { + if (code == 2) { + rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3; + rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3; + rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3; + } else { + rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3; + rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3; + rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3; + } + } else { + if (code == 2) { + rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2; + rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2; + rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2; + } else { + ZERO_4UBV(rgba); + } + } + } + rgba[ACOMP] = 255; +} + + +void TAPIENTRY +dxt1_rgba_decode_1 (const void *texture, int stride, + int i, int j, byte *rgba) +{ + /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */ + const byte *src = (const byte *)texture + + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8; + const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3; + if (code == 0) { + rgba[RCOMP] = UP5(CC_SEL(src, 11)); + rgba[GCOMP] = UP6(CC_SEL(src, 5)); + rgba[BCOMP] = UP5(CC_SEL(src, 0)); + rgba[ACOMP] = 255; + } else if (code == 1) { + rgba[RCOMP] = UP5(CC_SEL(src, 27)); + rgba[GCOMP] = UP6(CC_SEL(src, 21)); + rgba[BCOMP] = UP5(CC_SEL(src, 16)); + rgba[ACOMP] = 255; + } else { + const word col0 = src[0] | (src[1] << 8); + const word col1 = src[2] | (src[3] << 8); + if (col0 > col1) { + if (code == 2) { + rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3; + rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3; + rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3; + } else { + rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3; + rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3; + rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3; + } + rgba[ACOMP] = 255; + } else { + if (code == 2) { + rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2; + rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2; + rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2; + rgba[ACOMP] = 255; + } else { + ZERO_4UBV(rgba); + } + } + } +} + + +void TAPIENTRY +dxt3_rgba_decode_1 (const void *texture, int stride, + int i, int j, byte *rgba) +{ + const byte *src = (const byte *)texture + + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16; + const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3; + const dword *cc = (const dword *)(src + 8); + if (code == 0) { + rgba[RCOMP] = UP5(CC_SEL(cc, 11)); + rgba[GCOMP] = UP6(CC_SEL(cc, 5)); + rgba[BCOMP] = UP5(CC_SEL(cc, 0)); + } else if (code == 1) { + rgba[RCOMP] = UP5(CC_SEL(cc, 27)); + rgba[GCOMP] = UP6(CC_SEL(cc, 21)); + rgba[BCOMP] = UP5(CC_SEL(cc, 16)); + } else if (code == 2) { + /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */ + rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3; + rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3; + rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3; + } else { + rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3; + rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3; + rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3; + } + rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4)); +} + + +void TAPIENTRY +dxt5_rgba_decode_1 (const void *texture, int stride, + int i, int j, byte *rgba) +{ + const byte *src = (const byte *)texture + + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16; + const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3; + const dword *cc = (const dword *)(src + 8); + const byte alpha0 = src[0]; + const byte alpha1 = src[1]; + const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16; + const int acode = ((alphaShift == 31) + ? CC_SEL(src + 2, alphaShift - 16) + : CC_SEL(src, alphaShift)) & 0x7; + if (code == 0) { + rgba[RCOMP] = UP5(CC_SEL(cc, 11)); + rgba[GCOMP] = UP6(CC_SEL(cc, 5)); + rgba[BCOMP] = UP5(CC_SEL(cc, 0)); + } else if (code == 1) { + rgba[RCOMP] = UP5(CC_SEL(cc, 27)); + rgba[GCOMP] = UP6(CC_SEL(cc, 21)); + rgba[BCOMP] = UP5(CC_SEL(cc, 16)); + } else if (code == 2) { + /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */ + rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3; + rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3; + rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3; + } else { + rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3; + rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3; + rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3; + } + if (acode == 0) { + rgba[ACOMP] = alpha0; + } else if (acode == 1) { + rgba[ACOMP] = alpha1; + } else if (alpha0 > alpha1) { + rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7; + } else if (acode == 6) { + rgba[ACOMP] = 0; + } else if (acode == 7) { + rgba[ACOMP] = 255; + } else { + rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5; + } +} diff --git a/Source/GlideHQ/tc-1.1+/dxtn.h b/Source/GlideHQ/tc-1.1+/dxtn.h new file mode 100644 index 000000000..4078fd9f9 --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/dxtn.h @@ -0,0 +1,62 @@ +/* + * DXTn codec + * Version: 1.1 + * + * Copyright (C) 2004 Daniel Borca All Rights Reserved. + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef DXTN_H_included +#define DXTN_H_included + +TAPI int TAPIENTRY +dxt1_rgb_encode (int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); + +TAPI int TAPIENTRY +dxt1_rgba_encode (int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); + +TAPI int TAPIENTRY +dxt3_rgba_encode (int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); + +TAPI int TAPIENTRY +dxt5_rgba_encode (int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); + +TAPI void TAPIENTRY +dxt1_rgb_decode_1 (const void *texture, int stride /* in pixels */, + int i, int j, byte *rgba); + +TAPI void TAPIENTRY +dxt1_rgba_decode_1 (const void *texture, int stride /* in pixels */, + int i, int j, byte *rgba); + +TAPI void TAPIENTRY +dxt3_rgba_decode_1 (const void *texture, int stride /* in pixels */, + int i, int j, byte *rgba); + +TAPI void TAPIENTRY +dxt5_rgba_decode_1 (const void *texture, int stride /* in pixels */, + int i, int j, byte *rgba); + +#endif diff --git a/Source/GlideHQ/tc-1.1+/fxt1.c b/Source/GlideHQ/tc-1.1+/fxt1.c new file mode 100644 index 000000000..1287ced53 --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/fxt1.c @@ -0,0 +1,1459 @@ +/* + * FXT1 codec + * Version: 1.1 + * + * Copyright (C) 2004 Daniel Borca All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Copyright (C) 2007 Hiroshi Morii + * Added support for ARGB inputs. + */ + + +#include +#include + +#include "types.h" +#include "internal.h" +#include "fxt1.h" + + +/***************************************************************************\ + * FXT1 encoder + * + * The encoder was built by reversing the decoder, + * and is vaguely based on Texus2 by 3dfx. Note that this code + * is merely a proof of concept, since it is highly UNoptimized; + * moreover, it is sub-optimal due to initial conditions passed + * to Lloyd's algorithm (the interpolation modes are even worse). +\***************************************************************************/ + + +#define MAX_COMP 4 /* ever needed maximum number of components in texel */ +#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ +#define N_TEXELS 32 /* number of texels in a block (always 32) */ +#define LL_N_REP 50 /* number of iterations in lloyd's vq */ +#define LL_RMS_D 10 /* fault tolerance (maximum delta) */ +#define LL_RMS_E 255 /* fault tolerance (maximum error) */ +#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */ +#define ISTBLACK(v) (*((dword *)(v)) == 0) +#define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC)) + + +static int +fxt1_bestcol (float vec[][MAX_COMP], int nv, + byte input[MAX_COMP], int nc) +{ + int i, j, best = -1; + float err = 1e9; /* big enough */ + + for (j = 0; j < nv; j++) { + float e = 0.0F; + for (i = 0; i < nc; i++) { + e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]); + } + if (e < err) { + err = e; + best = j; + } + } + + return best; +} + + +static int +fxt1_worst (float vec[MAX_COMP], + byte input[N_TEXELS][MAX_COMP], int nc, int n) +{ + int i, k, worst = -1; + float err = -1.0F; /* small enough */ + + for (k = 0; k < n; k++) { + float e = 0.0F; + for (i = 0; i < nc; i++) { + e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]); + } + if (e > err) { + err = e; + worst = k; + } + } + + return worst; +} + + +static int +fxt1_variance (double variance[MAX_COMP], + byte input[N_TEXELS][MAX_COMP], int nc, int n) +{ + int i, k, best = 0; + dword sx, sx2; + double var, maxvar = -1; /* small enough */ + double teenth = 1.0 / n; + + for (i = 0; i < nc; i++) { + sx = sx2 = 0; + for (k = 0; k < n; k++) { + int t = input[k][i]; + sx += t; + sx2 += t * t; + } + var = sx2 * teenth - sx * sx * teenth * teenth; + if (maxvar < var) { + maxvar = var; + best = i; + } + if (variance) { + variance[i] = var; + } + } + + return best; +} + + +static int +fxt1_choose (float vec[][MAX_COMP], int nv, + byte input[N_TEXELS][MAX_COMP], int nc, int n) +{ +#if 0 + /* Choose colors from a grid. + */ + int i, j; + + for (j = 0; j < nv; j++) { + int m = j * (n - 1) / (nv - 1); + for (i = 0; i < nc; i++) { + vec[j][i] = input[m][i]; + } + } +#else + /* Our solution here is to find the darkest and brightest colors in + * the 8x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ + int i, j, k; +#ifndef YUV + int minSum = 2000; /* big enough */ +#else + int minSum = 2000000; +#endif + int maxSum = -1; /* small enough */ + int minCol = 0; /* phoudoin: silent compiler! */ + int maxCol = 0; /* phoudoin: silent compiler! */ + + struct { + int flag; + dword key; + int freq; + int idx; + } hist[N_TEXELS]; + int lenh = 0; + + memset(hist, 0, sizeof(hist)); + + for (k = 0; k < n; k++) { + int l; + dword key = 0; + int sum = 0; + for (i = 0; i < nc; i++) { + key <<= 8; + key |= input[k][i]; +#ifndef YUV + sum += input[k][i]; +#else + /* RGB to YUV conversion according to CCIR 601 specs + * Y = 0.299R+0.587G+0.114B + * U = 0.713(R - Y) = 0.500R-0.419G-0.081B + * V = 0.564(B - Y) = -0.169R-0.331G+0.500B + */ + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + } + for (l = 0; l < n; l++) { + if (!hist[l].flag) { + /* alloc new slot */ + hist[l].flag = !0; + hist[l].key = key; + hist[l].freq = 1; + hist[l].idx = k; + lenh = l + 1; + break; + } else if (hist[l].key == key) { + hist[l].freq++; + break; + } + } + if (minSum > sum) { + minSum = sum; + minCol = k; + } + if (maxSum < sum) { + maxSum = sum; + maxCol = k; + } + } + + if (lenh <= nv) { + for (j = 0; j < lenh; j++) { + for (i = 0; i < nc; i++) { + vec[j][i] = (float)input[hist[j].idx][i]; + } + } + for (; j < nv; j++) { + for (i = 0; i < nc; i++) { + vec[j][i] = vec[0][i]; + } + } + return 0; + } + + for (j = 0; j < nv; j++) { + for (i = 0; i < nc; i++) { + vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1); + } + } +#endif + + return !0; +} + + +static int +fxt1_lloyd (float vec[][MAX_COMP], int nv, + byte input[N_TEXELS][MAX_COMP], int nc, int n) +{ + /* Use the generalized lloyd's algorithm for VQ: + * find 4 color vectors. + * + * for each sample color + * sort to nearest vector. + * + * replace each vector with the centroid of it's matching colors. + * + * repeat until RMS doesn't improve. + * + * if a color vector has no samples, or becomes the same as another + * vector, replace it with the color which is farthest from a sample. + * + * vec[][MAX_COMP] initial vectors and resulting colors + * nv number of resulting colors required + * input[N_TEXELS][MAX_COMP] input texels + * nc number of components in input / vec + * n number of input samples + */ + + int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */ + int cnt[MAX_VECT]; /* how many times a certain vector was chosen */ + float error, lasterror = 1e9; + + int i, j, k, rep; + + /* the quantizer */ + for (rep = 0; rep < LL_N_REP; rep++) { + /* reset sums & counters */ + for (j = 0; j < nv; j++) { + for (i = 0; i < nc; i++) { + sum[j][i] = 0; + } + cnt[j] = 0; + } + error = 0; + + /* scan whole block */ + for (k = 0; k < n; k++) { +#if 1 + int best = -1; + float err = 1e9; /* big enough */ + /* determine best vector */ + for (j = 0; j < nv; j++) { + float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) + + (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) + + (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]); + if (nc == 4) { + e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]); + } + if (e < err) { + err = e; + best = j; + } + } +#else + int best = fxt1_bestcol(vec, nv, input[k], nc, &err); +#endif + /* add in closest color */ + for (i = 0; i < nc; i++) { + sum[best][i] += input[k][i]; + } + /* mark this vector as used */ + cnt[best]++; + /* accumulate error */ + error += err; + } + + /* check RMS */ + if ((error < LL_RMS_E) || + ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) { + return !0; /* good match */ + } + lasterror = error; + + /* move each vector to the barycenter of its closest colors */ + for (j = 0; j < nv; j++) { + if (cnt[j]) { + float div = 1.0F / cnt[j]; + for (i = 0; i < nc; i++) { + vec[j][i] = div * sum[j][i]; + } + } else { + /* this vec has no samples or is identical with a previous vec */ + int worst = fxt1_worst(vec[j], input, nc, n); + for (i = 0; i < nc; i++) { + vec[j][i] = input[worst][i]; + } + } + } + } + + return 0; /* could not converge fast enough */ +} + + +static void +fxt1_quantize_CHROMA (dword *cc, + byte input[N_TEXELS][MAX_COMP]) +{ + const int n_vect = 4; /* 4 base vectors to find */ + const int n_comp = 3; /* 3 components: R, G, B */ + float vec[MAX_VECT][MAX_COMP]; + int i, j, k; + qword hi; /* high quadword */ + dword lohi, lolo; /* low quadword: hi dword, lo dword */ + + if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) { + fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS); + } + + Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */ + for (j = n_vect - 1; j >= 0; j--) { + for (i = 0; i < n_comp; i++) { + /* add in colors */ + Q_SHL(hi, 5); + Q_OR32(hi, (dword)(vec[j][i] / 8.0F)); + } + } + ((qword *)cc)[1] = hi; + + lohi = lolo = 0; + /* right microtile */ + for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { + lohi <<= 2; + lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp); + } + /* left microtile */ + for (; k >= 0; k--) { + lolo <<= 2; + lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp); + } + cc[1] = lohi; + cc[0] = lolo; +} + + +static void +fxt1_quantize_ALPHA0 (dword *cc, + byte input[N_TEXELS][MAX_COMP], + byte reord[N_TEXELS][MAX_COMP], int n) +{ + const int n_vect = 3; /* 3 base vectors to find */ + const int n_comp = 4; /* 4 components: R, G, B, A */ + float vec[MAX_VECT][MAX_COMP]; + int i, j, k; + qword hi; /* high quadword */ + dword lohi, lolo; /* low quadword: hi dword, lo dword */ + + /* the last vector indicates zero */ + for (i = 0; i < n_comp; i++) { + vec[n_vect][i] = 0; + } + + /* the first n texels in reord are guaranteed to be non-zero */ + if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) { + fxt1_lloyd(vec, n_vect, reord, n_comp, n); + } + + Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */ + for (j = n_vect - 1; j >= 0; j--) { + /* add in alphas */ + Q_SHL(hi, 5); + Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F)); + } + for (j = n_vect - 1; j >= 0; j--) { + for (i = 0; i < n_comp - 1; i++) { + /* add in colors */ + Q_SHL(hi, 5); + Q_OR32(hi, (dword)(vec[j][i] / 8.0F)); + } + } + ((qword *)cc)[1] = hi; + + lohi = lolo = 0; + /* right microtile */ + for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { + lohi <<= 2; + lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); + } + /* left microtile */ + for (; k >= 0; k--) { + lolo <<= 2; + lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); + } + cc[1] = lohi; + cc[0] = lolo; +} + + +static void +fxt1_quantize_ALPHA1 (dword *cc, + byte input[N_TEXELS][MAX_COMP]) +{ + const int n_vect = 3; /* highest vector number in each microtile */ + const int n_comp = 4; /* 4 components: R, G, B, A */ + float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */ + float b, iv[MAX_COMP]; /* interpolation vector */ + int i, j, k; + qword hi; /* high quadword */ + dword lohi, lolo; /* low quadword: hi dword, lo dword */ + + int minSum; + int maxSum; + int minColL = 0, maxColL = 0; + int minColR = 0, maxColR = 0; + int sumL = 0, sumR = 0; + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ +#ifndef YUV + minSum = 2000; /* big enough */ +#else + minSum = 2000000; +#endif + maxSum = -1; /* small enough */ + for (k = 0; k < N_TEXELS / 2; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minColL = k; + } + if (maxSum < sum) { + maxSum = sum; + maxColL = k; + } + sumL += sum; + } +#ifndef YUV + minSum = 2000; /* big enough */ +#else + minSum = 2000000; +#endif + maxSum = -1; /* small enough */ + for (; k < N_TEXELS; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minColR = k; + } + if (maxSum < sum) { + maxSum = sum; + maxColR = k; + } + sumR += sum; + } + + /* choose the common vector (yuck!) */ + { + int j1, j2; + int v1 = 0, v2 = 0; + float err = 1e9; /* big enough */ + float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ + for (i = 0; i < n_comp; i++) { + tv[0][i] = input[minColL][i]; + tv[1][i] = input[maxColL][i]; + tv[2][i] = input[minColR][i]; + tv[3][i] = input[maxColR][i]; + } + for (j1 = 0; j1 < 2; j1++) { + for (j2 = 2; j2 < 4; j2++) { + float e = 0.0F; + for (i = 0; i < n_comp; i++) { + e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]); + } + if (e < err) { + err = e; + v1 = j1; + v2 = j2; + } + } + } + for (i = 0; i < n_comp; i++) { + vec[0][i] = tv[1 - v1][i]; + vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR); + vec[2][i] = tv[5 - v2][i]; + } + } + + /* left microtile */ + cc[0] = 0; + if (minColL != maxColL) { + /* compute interpolation vector */ + MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); + + /* add in texels */ + lolo = 0; + for (k = N_TEXELS / 2 - 1; k >= 0; k--) { + int texel; + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + /* add in texel */ + lolo <<= 2; + lolo |= texel; + } + + cc[0] = lolo; + } + + /* right microtile */ + cc[1] = 0; + if (minColR != maxColR) { + /* compute interpolation vector */ + MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]); + + /* add in texels */ + lohi = 0; + for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { + int texel; + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + /* add in texel */ + lohi <<= 2; + lohi |= texel; + } + + cc[1] = lohi; + } + + Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */ + for (j = n_vect - 1; j >= 0; j--) { + /* add in alphas */ + Q_SHL(hi, 5); + Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F)); + } + for (j = n_vect - 1; j >= 0; j--) { + for (i = 0; i < n_comp - 1; i++) { + /* add in colors */ + Q_SHL(hi, 5); + Q_OR32(hi, (dword)(vec[j][i] / 8.0F)); + } + } + ((qword *)cc)[1] = hi; +} + + +static void +fxt1_quantize_HI (dword *cc, + byte input[N_TEXELS][MAX_COMP], + byte reord[N_TEXELS][MAX_COMP], int n) +{ + const int n_vect = 6; /* highest vector number */ + const int n_comp = 3; /* 3 components: R, G, B */ + float b = 0.0F; /* phoudoin: silent compiler! */ + float iv[MAX_COMP]; /* interpolation vector */ + int i, k; + dword hihi; /* high quadword: hi dword */ + +#ifndef YUV + int minSum = 2000; /* big enough */ +#else + int minSum = 2000000; +#endif + int maxSum = -1; /* small enough */ + int minCol = 0; /* phoudoin: silent compiler! */ + int maxCol = 0; /* phoudoin: silent compiler! */ + + /* Our solution here is to find the darkest and brightest colors in + * the 8x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ + for (k = 0; k < n; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += reord[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minCol = k; + } + if (maxSum < sum) { + maxSum = sum; + maxCol = k; + } + } + + hihi = 0; /* cc-hi = "00" */ + for (i = 0; i < n_comp; i++) { + /* add in colors */ + hihi <<= 5; + hihi |= reord[maxCol][i] >> 3; + } + for (i = 0; i < n_comp; i++) { + /* add in colors */ + hihi <<= 5; + hihi |= reord[minCol][i] >> 3; + } + cc[3] = hihi; + cc[0] = cc[1] = cc[2] = 0; + + /* compute interpolation vector */ + if (minCol != maxCol) { + MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]); + } + + /* add in texels */ + for (k = N_TEXELS - 1; k >= 0; k--) { + int t = k * 3; + dword *kk = (dword *)((byte *)cc + t / 8); + int texel = n_vect + 1; /* transparent black */ + + if (!ISTBLACK(input[k])) { + if (minCol != maxCol) { + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + /* add in texel */ + kk[0] |= texel << (t & 7); + } + } else { + /* add in texel */ + kk[0] |= texel << (t & 7); + } + } +} + + +static void +fxt1_quantize_MIXED1 (dword *cc, + byte input[N_TEXELS][MAX_COMP]) +{ + const int n_vect = 2; /* highest vector number in each microtile */ + const int n_comp = 3; /* 3 components: R, G, B */ + byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ + float b, iv[MAX_COMP]; /* interpolation vector */ + int i, j, k; + qword hi; /* high quadword */ + dword lohi, lolo; /* low quadword: hi dword, lo dword */ + + int minSum; + int maxSum; + int minColL = 0, maxColL = -1; + int minColR = 0, maxColR = -1; + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ +#ifndef YUV + minSum = 2000; /* big enough */ +#else + minSum = 2000000; +#endif + maxSum = -1; /* small enough */ + for (k = 0; k < N_TEXELS / 2; k++) { + if (!ISTBLACK(input[k])) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minColL = k; + } + if (maxSum < sum) { + maxSum = sum; + maxColL = k; + } + } + } +#ifndef YUV + minSum = 2000; /* big enough */ +#else + minSum = 2000000; +#endif + maxSum = -1; /* small enough */ + for (; k < N_TEXELS; k++) { + if (!ISTBLACK(input[k])) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minColR = k; + } + if (maxSum < sum) { + maxSum = sum; + maxColR = k; + } + } + } + + /* left microtile */ + if (maxColL == -1) { + /* all transparent black */ + cc[0] = ~0UL; + for (i = 0; i < n_comp; i++) { + vec[0][i] = 0; + vec[1][i] = 0; + } + } else { + cc[0] = 0; + for (i = 0; i < n_comp; i++) { + vec[0][i] = input[minColL][i]; + vec[1][i] = input[maxColL][i]; + } + if (minColL != maxColL) { + /* compute interpolation vector */ + MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); + + /* add in texels */ + lolo = 0; + for (k = N_TEXELS / 2 - 1; k >= 0; k--) { + int texel = n_vect + 1; /* transparent black */ + if (!ISTBLACK(input[k])) { + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + } + /* add in texel */ + lolo <<= 2; + lolo |= texel; + } + cc[0] = lolo; + } + } + + /* right microtile */ + if (maxColR == -1) { + /* all transparent black */ + cc[1] = ~0UL; + for (i = 0; i < n_comp; i++) { + vec[2][i] = 0; + vec[3][i] = 0; + } + } else { + cc[1] = 0; + for (i = 0; i < n_comp; i++) { + vec[2][i] = input[minColR][i]; + vec[3][i] = input[maxColR][i]; + } + if (minColR != maxColR) { + /* compute interpolation vector */ + MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); + + /* add in texels */ + lohi = 0; + for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { + int texel = n_vect + 1; /* transparent black */ + if (!ISTBLACK(input[k])) { + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + } + /* add in texel */ + lohi <<= 2; + lohi |= texel; + } + cc[1] = lohi; + } + } + + Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ + for (j = 2 * 2 - 1; j >= 0; j--) { + for (i = 0; i < n_comp; i++) { + /* add in colors */ + Q_SHL(hi, 5); + Q_OR32(hi, vec[j][i] >> 3); + } + } + ((qword *)cc)[1] = hi; +} + + +static void +fxt1_quantize_MIXED0 (dword *cc, + byte input[N_TEXELS][MAX_COMP]) +{ + const int n_vect = 3; /* highest vector number in each microtile */ + const int n_comp = 3; /* 3 components: R, G, B */ + byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ + float b, iv[MAX_COMP]; /* interpolation vector */ + int i, j, k; + qword hi; /* high quadword */ + dword lohi, lolo; /* low quadword: hi dword, lo dword */ + + int minColL = 0, maxColL = 0; + int minColR = 0, maxColR = 0; +#if 0 + int minSum; + int maxSum; + + /* Our solution here is to find the darkest and brightest colors in + * the 4x4 tile and use those as the two representative colors. + * There are probably better algorithms to use (histogram-based). + */ +#ifndef YUV + minSum = 2000; /* big enough */ +#else + minSum = 2000000; +#endif + maxSum = -1; /* small enough */ + for (k = 0; k < N_TEXELS / 2; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minColL = k; + } + if (maxSum < sum) { + maxSum = sum; + maxColL = k; + } + } + minSum = 2000; /* big enough */ + maxSum = -1; /* small enough */ + for (; k < N_TEXELS; k++) { + int sum = 0; +#ifndef YUV + for (i = 0; i < n_comp; i++) { + sum += input[k][i]; + } +#else + sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; +#endif + if (minSum > sum) { + minSum = sum; + minColR = k; + } + if (maxSum < sum) { + maxSum = sum; + maxColR = k; + } + } +#else + int minVal; + int maxVal; + int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2); + int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2); + + /* Scan the channel with max variance for lo & hi + * and use those as the two representative colors. + */ + minVal = 2000; /* big enough */ + maxVal = -1; /* small enough */ + for (k = 0; k < N_TEXELS / 2; k++) { + int t = input[k][maxVarL]; + if (minVal > t) { + minVal = t; + minColL = k; + } + if (maxVal < t) { + maxVal = t; + maxColL = k; + } + } + minVal = 2000; /* big enough */ + maxVal = -1; /* small enough */ + for (; k < N_TEXELS; k++) { + int t = input[k][maxVarR]; + if (minVal > t) { + minVal = t; + minColR = k; + } + if (maxVal < t) { + maxVal = t; + maxColR = k; + } + } +#endif + + /* left microtile */ + cc[0] = 0; + for (i = 0; i < n_comp; i++) { + vec[0][i] = input[minColL][i]; + vec[1][i] = input[maxColL][i]; + } + if (minColL != maxColL) { + /* compute interpolation vector */ + MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); + + /* add in texels */ + lolo = 0; + for (k = N_TEXELS / 2 - 1; k >= 0; k--) { + int texel; + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + /* add in texel */ + lolo <<= 2; + lolo |= texel; + } + + /* funky encoding for LSB of green */ + if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) { + for (i = 0; i < n_comp; i++) { + vec[1][i] = input[minColL][i]; + vec[0][i] = input[maxColL][i]; + } + lolo = ~lolo; + } + + cc[0] = lolo; + } + + /* right microtile */ + cc[1] = 0; + for (i = 0; i < n_comp; i++) { + vec[2][i] = input[minColR][i]; + vec[3][i] = input[maxColR][i]; + } + if (minColR != maxColR) { + /* compute interpolation vector */ + MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); + + /* add in texels */ + lohi = 0; + for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { + int texel; + /* interpolate color */ + CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); + /* add in texel */ + lohi <<= 2; + lohi |= texel; + } + + /* funky encoding for LSB of green */ + if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) { + for (i = 0; i < n_comp; i++) { + vec[3][i] = input[minColR][i]; + vec[2][i] = input[maxColR][i]; + } + lohi = ~lohi; + } + + cc[1] = lohi; + } + + Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ + for (j = 2 * 2 - 1; j >= 0; j--) { + for (i = 0; i < n_comp; i++) { + /* add in colors */ + Q_SHL(hi, 5); + Q_OR32(hi, vec[j][i] >> 3); + } + } + ((qword *)cc)[1] = hi; +} + + +static void +fxt1_quantize (dword *cc, const byte *lines[], int comps) +{ + int trualpha; + byte reord[N_TEXELS][MAX_COMP]; + + byte input[N_TEXELS][MAX_COMP]; +#ifndef ARGB + int i; +#endif + int k, l; + + if (comps == 3) { + /* make the whole block opaque */ + memset(input, -1, sizeof(input)); + } + + /* 8 texels each line */ +#ifndef ARGB + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + for (i = 0; i < comps; i++) { + input[k + l * 4][i] = *lines[l]++; + } + } + for (; k < 8; k++) { + for (i = 0; i < comps; i++) { + input[k + l * 4 + 12][i] = *lines[l]++; + } + } + } +#else + /* H.Morii - support for ARGB inputs */ + for (l = 0; l < 4; l++) { + for (k = 0; k < 4; k++) { + input[k + l * 4][2] = *lines[l]++; + input[k + l * 4][1] = *lines[l]++; + input[k + l * 4][0] = *lines[l]++; + if (comps == 4) input[k + l * 4][3] = *lines[l]++; + } + for (; k < 8; k++) { + input[k + l * 4 + 12][2] = *lines[l]++; + input[k + l * 4 + 12][1] = *lines[l]++; + input[k + l * 4 + 12][0] = *lines[l]++; + if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++; + } + } +#endif + + /* block layout: + * 00, 01, 02, 03, 08, 09, 0a, 0b + * 10, 11, 12, 13, 18, 19, 1a, 1b + * 04, 05, 06, 07, 0c, 0d, 0e, 0f + * 14, 15, 16, 17, 1c, 1d, 1e, 1f + */ + + /* [dBorca] + * stupidity flows forth from this + */ + l = N_TEXELS; + trualpha = 0; + if (comps == 4) { + /* skip all transparent black texels */ + l = 0; + for (k = 0; k < N_TEXELS; k++) { + /* test all components against 0 */ + if (!ISTBLACK(input[k])) { + /* texel is not transparent black */ + COPY_4UBV(reord[l], input[k]); + if (reord[l][ACOMP] < (255 - ALPHA_TS)) { + /* non-opaque texel */ + trualpha = !0; + } + l++; + } + } + } + +#if 0 + if (trualpha) { + fxt1_quantize_ALPHA0(cc, input, reord, l); + } else if (l == 0) { + cc[0] = cc[1] = cc[2] = -1; + cc[3] = 0; + } else if (l < N_TEXELS) { + fxt1_quantize_HI(cc, input, reord, l); + } else { + fxt1_quantize_CHROMA(cc, input); + } + (void)fxt1_quantize_ALPHA1; + (void)fxt1_quantize_MIXED1; + (void)fxt1_quantize_MIXED0; +#else + if (trualpha) { + fxt1_quantize_ALPHA1(cc, input); + } else if (l == 0) { + cc[0] = cc[1] = cc[2] = ~0UL; + cc[3] = 0; + } else if (l < N_TEXELS) { + fxt1_quantize_MIXED1(cc, input); + } else { + fxt1_quantize_MIXED0(cc, input); + } + (void)fxt1_quantize_ALPHA0; + (void)fxt1_quantize_HI; + (void)fxt1_quantize_CHROMA; +#endif +} + + +TAPI int TAPIENTRY +fxt1_encode (int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride) +{ + int x, y; + const byte *data; + dword *encoded = (dword *)dest; + void *newSource = NULL; + + /* Replicate image if width is not M8 or height is not M4 */ + if ((width & 7) | (height & 3)) { + int newWidth = (width + 7) & ~7; + int newHeight = (height + 3) & ~3; + newSource = malloc(comps * newWidth * newHeight * sizeof(byte *)); + _mesa_upscale_teximage2d(width, height, newWidth, newHeight, + comps, (const byte *)source, + srcRowStride, (byte *)newSource); + source = newSource; + width = newWidth; + height = newHeight; + srcRowStride = comps * newWidth; + } + + data = (const byte *)source; + destRowStride = (destRowStride - width * 2) / 4; + for (y = 0; y < height; y += 4) { + unsigned int offs = 0 + (y + 0) * srcRowStride; + for (x = 0; x < width; x += 8) { + const byte *lines[4]; + lines[0] = &data[offs]; + lines[1] = lines[0] + srcRowStride; + lines[2] = lines[1] + srcRowStride; + lines[3] = lines[2] + srcRowStride; + offs += 8 * comps; + fxt1_quantize(encoded, lines, comps); + /* 128 bits per 8x4 block */ + encoded += 4; + } + encoded += destRowStride; + } + + if (newSource != NULL) { + free(newSource); + } + + return 0; +} + + +/***************************************************************************\ + * FXT1 decoder + * + * The decoder is based on GL_3DFX_texture_compression_FXT1 + * specification and serves as a concept for the encoder. +\***************************************************************************/ + + +/* lookup table for scaling 5 bit colors up to 8 bits */ +static const byte _rgb_scale_5[] = { + 0, 8, 16, 25, 33, 41, 49, 58, + 66, 74, 82, 90, 99, 107, 115, 123, + 132, 140, 148, 156, 165, 173, 181, 189, + 197, 206, 214, 222, 230, 239, 247, 255 +}; + +/* lookup table for scaling 6 bit colors up to 8 bits */ +static const byte _rgb_scale_6[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 45, 49, 53, 57, 61, + 65, 69, 73, 77, 81, 85, 89, 93, + 97, 101, 105, 109, 113, 117, 121, 125, + 130, 134, 138, 142, 146, 150, 154, 158, + 162, 166, 170, 174, 178, 182, 186, 190, + 194, 198, 202, 206, 210, 215, 219, 223, + 227, 231, 235, 239, 243, 247, 251, 255 +}; + + +#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31)) +#define UP5(c) _rgb_scale_5[(c) & 31] +#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)] +#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n) +#define ZERO_4UBV(v) *((dword *)(v)) = 0 + + +static void +fxt1_decode_1HI (const byte *code, int t, byte *rgba) +{ + const dword *cc; + + t *= 3; + cc = (const dword *)(code + t / 8); + t = (cc[0] >> (t & 7)) & 7; + + if (t == 7) { + ZERO_4UBV(rgba); + } else { + cc = (const dword *)(code + 12); + if (t == 0) { + rgba[BCOMP] = UP5(CC_SEL(cc, 0)); + rgba[GCOMP] = UP5(CC_SEL(cc, 5)); + rgba[RCOMP] = UP5(CC_SEL(cc, 10)); + } else if (t == 6) { + rgba[BCOMP] = UP5(CC_SEL(cc, 15)); + rgba[GCOMP] = UP5(CC_SEL(cc, 20)); + rgba[RCOMP] = UP5(CC_SEL(cc, 25)); + } else { + rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15))); + rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20))); + rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25))); + } + rgba[ACOMP] = 255; + } +} + + +static void +fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba) +{ + const dword *cc; + dword kk; + + cc = (const dword *)code; + if (t & 16) { + cc++; + t &= 15; + } + t = (cc[0] >> (t * 2)) & 3; + + t *= 15; + cc = (const dword *)(code + 8 + t / 8); + kk = cc[0] >> (t & 7); + rgba[BCOMP] = UP5(kk); + rgba[GCOMP] = UP5(kk >> 5); + rgba[RCOMP] = UP5(kk >> 10); + rgba[ACOMP] = 255; +} + + +static void +fxt1_decode_1MIXED (const byte *code, int t, byte *rgba) +{ + const dword *cc; + int col[2][3]; + int glsb, selb; + + cc = (const dword *)code; + if (t & 16) { + t &= 15; + t = (cc[1] >> (t * 2)) & 3; + /* col 2 */ + col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6; + col[0][GCOMP] = CC_SEL(cc, 99); + col[0][RCOMP] = CC_SEL(cc, 104); + /* col 3 */ + col[1][BCOMP] = CC_SEL(cc, 109); + col[1][GCOMP] = CC_SEL(cc, 114); + col[1][RCOMP] = CC_SEL(cc, 119); + glsb = CC_SEL(cc, 126); + selb = CC_SEL(cc, 33); + } else { + t = (cc[0] >> (t * 2)) & 3; + /* col 0 */ + col[0][BCOMP] = CC_SEL(cc, 64); + col[0][GCOMP] = CC_SEL(cc, 69); + col[0][RCOMP] = CC_SEL(cc, 74); + /* col 1 */ + col[1][BCOMP] = CC_SEL(cc, 79); + col[1][GCOMP] = CC_SEL(cc, 84); + col[1][RCOMP] = CC_SEL(cc, 89); + glsb = CC_SEL(cc, 125); + selb = CC_SEL(cc, 1); + } + + if (CC_SEL(cc, 124) & 1) { + /* alpha[0] == 1 */ + + if (t == 3) { + ZERO_4UBV(rgba); + } else { + if (t == 0) { + rgba[BCOMP] = UP5(col[0][BCOMP]); + rgba[GCOMP] = UP5(col[0][GCOMP]); + rgba[RCOMP] = UP5(col[0][RCOMP]); + } else if (t == 2) { + rgba[BCOMP] = UP5(col[1][BCOMP]); + rgba[GCOMP] = UP6(col[1][GCOMP], glsb); + rgba[RCOMP] = UP5(col[1][RCOMP]); + } else { + rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2; + rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2; + rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2; + } + rgba[ACOMP] = 255; + } + } else { + /* alpha[0] == 0 */ + + if (t == 0) { + rgba[BCOMP] = UP5(col[0][BCOMP]); + rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb); + rgba[RCOMP] = UP5(col[0][RCOMP]); + } else if (t == 3) { + rgba[BCOMP] = UP5(col[1][BCOMP]); + rgba[GCOMP] = UP6(col[1][GCOMP], glsb); + rgba[RCOMP] = UP5(col[1][RCOMP]); + } else { + rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP])); + rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb), + UP6(col[1][GCOMP], glsb)); + rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP])); + } + rgba[ACOMP] = 255; + } +} + + +static void +fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba) +{ + const dword *cc; + + cc = (const dword *)code; + if (CC_SEL(cc, 124) & 1) { + /* lerp == 1 */ + int col0[4]; + + if (t & 16) { + t &= 15; + t = (cc[1] >> (t * 2)) & 3; + /* col 2 */ + col0[BCOMP] = (*(const dword *)(code + 11)) >> 6; + col0[GCOMP] = CC_SEL(cc, 99); + col0[RCOMP] = CC_SEL(cc, 104); + col0[ACOMP] = CC_SEL(cc, 119); + } else { + t = (cc[0] >> (t * 2)) & 3; + /* col 0 */ + col0[BCOMP] = CC_SEL(cc, 64); + col0[GCOMP] = CC_SEL(cc, 69); + col0[RCOMP] = CC_SEL(cc, 74); + col0[ACOMP] = CC_SEL(cc, 109); + } + + if (t == 0) { + rgba[BCOMP] = UP5(col0[BCOMP]); + rgba[GCOMP] = UP5(col0[GCOMP]); + rgba[RCOMP] = UP5(col0[RCOMP]); + rgba[ACOMP] = UP5(col0[ACOMP]); + } else if (t == 3) { + rgba[BCOMP] = UP5(CC_SEL(cc, 79)); + rgba[GCOMP] = UP5(CC_SEL(cc, 84)); + rgba[RCOMP] = UP5(CC_SEL(cc, 89)); + rgba[ACOMP] = UP5(CC_SEL(cc, 114)); + } else { + rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79))); + rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84))); + rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89))); + rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114))); + } + } else { + /* lerp == 0 */ + + if (t & 16) { + cc++; + t &= 15; + } + t = (cc[0] >> (t * 2)) & 3; + + if (t == 3) { + ZERO_4UBV(rgba); + } else { + dword kk; + cc = (const dword *)code; + rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13)); + t *= 15; + cc = (const dword *)(code + 8 + t / 8); + kk = cc[0] >> (t & 7); + rgba[BCOMP] = UP5(kk); + rgba[GCOMP] = UP5(kk >> 5); + rgba[RCOMP] = UP5(kk >> 10); + } + } +} + + +TAPI void TAPIENTRY +fxt1_decode_1 (const void *texture, int stride, + int i, int j, byte *rgba) +{ + static void (*decode_1[]) (const byte *, int, byte *) = { + fxt1_decode_1HI, /* cc-high = "00?" */ + fxt1_decode_1HI, /* cc-high = "00?" */ + fxt1_decode_1CHROMA, /* cc-chroma = "010" */ + fxt1_decode_1ALPHA, /* alpha = "011" */ + fxt1_decode_1MIXED, /* mixed = "1??" */ + fxt1_decode_1MIXED, /* mixed = "1??" */ + fxt1_decode_1MIXED, /* mixed = "1??" */ + fxt1_decode_1MIXED /* mixed = "1??" */ + }; + + const byte *code = (const byte *)texture + + ((j / 4) * (stride / 8) + (i / 8)) * 16; + int mode = CC_SEL(code, 125); + int t = i & 7; + + if (t & 4) { + t += 12; + } + t += (j & 3) * 4; + + decode_1[mode](code, t, rgba); + +#if VERBOSE + { + extern int cc_chroma; + extern int cc_alpha; + extern int cc_high; + extern int cc_mixed; + static int *cctype[] = { + &cc_high, + &cc_high, + &cc_chroma, + &cc_alpha, + &cc_mixed, + &cc_mixed, + &cc_mixed, + &cc_mixed + }; + (*cctype[mode])++; + } +#endif +} diff --git a/Source/GlideHQ/tc-1.1+/fxt1.h b/Source/GlideHQ/tc-1.1+/fxt1.h new file mode 100644 index 000000000..c2919bbac --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/fxt1.h @@ -0,0 +1,38 @@ +/* + * FXT1 codec + * Version: 1.1 + * + * Copyright (C) 2004 Daniel Borca All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef FXT1_H_included +#define FXT1_H_included + +TAPI int TAPIENTRY +fxt1_encode (int width, int height, int comps, + const void *source, int srcRowStride, + void *dest, int destRowStride); + +TAPI void TAPIENTRY +fxt1_decode_1 (const void *texture, int stride /* in pixels */, + int i, int j, byte *rgba); + +#endif diff --git a/Source/GlideHQ/tc-1.1+/internal.h b/Source/GlideHQ/tc-1.1+/internal.h new file mode 100644 index 000000000..28ace4b45 --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/internal.h @@ -0,0 +1,137 @@ +/* + * Texture compression + * Version: 1.0 + * + * Copyright (C) 2004 Daniel Borca All Rights Reserved. + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef INTERNAL_H_included +#define INTERNAL_H_included + +/*****************************************************************************\ + * DLL stuff +\*****************************************************************************/ + +#ifdef __WIN32__ +#define TAPI __declspec(dllexport) +#define TAPIENTRY /*__stdcall*/ +#else +#define TAPI +#define TAPIENTRY +#endif + + +/*****************************************************************************\ + * 64bit types on 32bit machine +\*****************************************************************************/ + +#if (defined(__GNUC__) && !defined(__cplusplus)) || defined(WIN32) + +typedef unsigned long long qword; + +#define Q_MOV32(a, b) a = b +#define Q_OR32(a, b) a |= b +#define Q_SHL(a, c) a <<= c + +#else /* !__GNUC__ */ + +typedef struct { + dword lo, hi; +} qword; + +#define Q_MOV32(a, b) a.lo = b +#define Q_OR32(a, b) a.lo |= b +#define Q_SHL(a, c) \ + do { \ + if ((c) >= 32) { \ + a.hi = a.lo << ((c) - 32); \ + a.lo = 0; \ + } else { \ + a.hi = (a.hi << (c)) | (a.lo >> (32 - (c)));\ + a.lo <<= c; \ + } \ + } while (0) + +#endif /* !__GNUC__ */ + + +/*****************************************************************************\ + * Config +\*****************************************************************************/ + +#define RCOMP 0 +#define GCOMP 1 +#define BCOMP 2 +#define ACOMP 3 + +/*****************************************************************************\ + * Metric +\*****************************************************************************/ + +#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */ +#define SAFECDOT 1 /* for paranoids */ + +#define MAKEIVEC(NV, NC, IV, B, V0, V1) \ + do { \ + /* compute interpolation vector */\ + float d2 = 0.0F; \ + float rd2; \ + \ + for (i = 0; i < NC; i++) { \ + IV[i] = (V1[i] - V0[i]) * F(i);\ + d2 += IV[i] * IV[i]; \ + } \ + rd2 = (float)NV / d2; \ + B = 0; \ + for (i = 0; i < NC; i++) { \ + IV[i] *= F(i); \ + B -= IV[i] * V0[i]; \ + IV[i] *= rd2; \ + } \ + B = B * rd2 + 0.5F; \ + } while (0) + +#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\ + do { \ + float dot = 0.0F; \ + for (i = 0; i < NC; i++) { \ + dot += V[i] * IV[i]; \ + } \ + TEXEL = (int)(dot + B); \ + if (SAFECDOT) { \ + if (TEXEL < 0) { \ + TEXEL = 0; \ + } else if (TEXEL > NV) { \ + TEXEL = NV; \ + } \ + } \ + } while (0) + + +/*****************************************************************************\ + * Utility functions +\*****************************************************************************/ + +void +_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight, + unsigned int outWidth, unsigned int outHeight, + unsigned int comps, + const byte *src, int srcRowStride, + unsigned char *dest); + +#endif diff --git a/Source/GlideHQ/tc-1.1+/texstore.c b/Source/GlideHQ/tc-1.1+/texstore.c new file mode 100644 index 000000000..2eb0306fe --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/texstore.c @@ -0,0 +1,93 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Copyright (C) 2007 Hiroshi Morii + * _mesa_upscale_teximage2d speedup + */ + +#include + +#include "types.h" +#include "internal.h" + + +void +_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight, + unsigned int outWidth, unsigned int outHeight, + unsigned int comps, + const byte *src, int srcRowStride, + byte *dest) +{ + unsigned int i, j, k; + + assert(outWidth >= inWidth); + assert(outHeight >= inHeight); + +#if 1 /* H.Morii - faster loops */ + for (i = 0; i < inHeight; i++) { + for (j = 0; j < inWidth; j++) { + const int aa = (i * outWidth + j) * comps; + const int bb = i * srcRowStride + j * comps; + for (k = 0; k < comps; k++) { + dest[aa + k] = src[bb + k]; + } + } + for (; j < outWidth; j++) { + const int aa = (i * outWidth + j) * comps; + const int bb = i * srcRowStride + (j - inWidth) * comps; + for (k = 0; k < comps; k++) { + dest[aa + k] = src[bb + k]; + } + } + } + for (; i < outHeight; i++) { + for (j = 0; j < inWidth; j++) { + const int aa = (i * outWidth + j) * comps; + const int bb = (i - inHeight) * srcRowStride + j * comps; + for (k = 0; k < comps; k++) { + dest[aa + k] = src[bb + k]; + } + } + for (; j < outWidth; j++) { + const int aa = (i * outWidth + j) * comps; + const int bb = (i - inHeight) * srcRowStride + (j - inWidth) * comps; + for (k = 0; k < comps; k++) { + dest[aa + k] = src[bb + k]; + } + } + } +#else + for (i = 0; i < outHeight; i++) { + const int ii = i % inHeight; + for (j = 0; j < outWidth; j++) { + const int jj = j % inWidth; + const int aa = (i * outWidth + j) * comps; + const int bb = ii * srcRowStride + jj * comps; + for (k = 0; k < comps; k++) { + dest[aa + k] = src[bb + k]; + } + } + } +#endif +} diff --git a/Source/GlideHQ/tc-1.1+/types.h b/Source/GlideHQ/tc-1.1+/types.h new file mode 100644 index 000000000..40e7153ab --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/types.h @@ -0,0 +1,11 @@ +#ifndef TYPES_H_included +#define TYPES_H_included + +/*****************************************************************************\ + * 32bit types +\*****************************************************************************/ +typedef unsigned char byte; /* 8-bit */ +typedef unsigned short word; /* 16-bit */ +typedef unsigned int dword; /* 32-bit */ + +#endif diff --git a/Source/GlideHQ/tc-1.1+/wrapper.c b/Source/GlideHQ/tc-1.1+/wrapper.c new file mode 100644 index 000000000..7bc8d610a --- /dev/null +++ b/Source/GlideHQ/tc-1.1+/wrapper.c @@ -0,0 +1,110 @@ +/* + * Texture compression + * Version: 1.0 + * + * Copyright (C) 2004 Daniel Borca All Rights Reserved. + * + * this is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * this is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include + +#include "types.h" +#include "internal.h" +#include "dxtn.h" + + +#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 +#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 +#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 + + +TAPI void TAPIENTRY +fetch_2d_texel_rgb_dxt1 (int texImage_RowStride, + const byte *texImage_Data, + int i, int j, + byte *texel) +{ + dxt1_rgb_decode_1(texImage_Data, texImage_RowStride, i, j, texel); +} + + +TAPI void TAPIENTRY +fetch_2d_texel_rgba_dxt1 (int texImage_RowStride, + const byte *texImage_Data, + int i, int j, + byte *texel) +{ + dxt1_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel); +} + + +TAPI void TAPIENTRY +fetch_2d_texel_rgba_dxt3 (int texImage_RowStride, + const byte *texImage_Data, + int i, int j, + byte *texel) +{ + dxt3_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel); +} + + +TAPI void TAPIENTRY +fetch_2d_texel_rgba_dxt5 (int texImage_RowStride, + const byte *texImage_Data, + int i, int j, + byte *texel) +{ + dxt5_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel); +} + + +TAPI void TAPIENTRY +tx_compress_dxtn (int srccomps, int width, int height, + const byte *source, int destformat, byte *dest, + int destRowStride) +{ + int srcRowStride = width * srccomps; + int rv; + + switch (destformat) { + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + rv = dxt1_rgb_encode(width, height, srccomps, + source, srcRowStride, + dest, destRowStride); + break; + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + rv = dxt1_rgba_encode(width, height, srccomps, + source, srcRowStride, + dest, destRowStride); + break; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + rv = dxt3_rgba_encode(width, height, srccomps, + source, srcRowStride, + dest, destRowStride); + break; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + rv = dxt5_rgba_encode(width, height, srccomps, + source, srcRowStride, + dest, destRowStride); + break; + default: + assert(0); + } + + /*return rv;*/ +}