diff --git a/Source/GlideHQ/Ext_TxFilter.cpp b/Source/GlideHQ/Ext_TxFilter.cpp
new file mode 100644
index 000000000..f5dcc7543
--- /dev/null
+++ b/Source/GlideHQ/Ext_TxFilter.cpp
@@ -0,0 +1,27 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <memory.h>
+#include <stdlib.h>
+#include "Ext_TxFilter.h"
+
diff --git a/Source/GlideHQ/Ext_TxFilter.h b/Source/GlideHQ/Ext_TxFilter.h
new file mode 100644
index 000000000..54cb8b68b
--- /dev/null
+++ b/Source/GlideHQ/Ext_TxFilter.h
@@ -0,0 +1,212 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __EXT_TXFILTER_H__
+#define __EXT_TXFILTER_H__
+
+#ifdef WIN32
+#include <windows.h>
+#define TXHMODULE HMODULE
+#define DLOPEN(a) LoadLibraryW(a)
+#define DLCLOSE(a) FreeLibrary(a)
+#define DLSYM(a, b) GetProcAddress(a, b)
+#define GETCWD(a, b) GetCurrentDirectoryW(a, b)
+#define CHDIR(a) SetCurrentDirectoryW(a)
+#else
+#include <iostream>
+#include <dlfcn.h> 
+#define MAX_PATH 4095
+#define TXHMODULE void*
+#define DLOPEN(a) dlopen(a, RTLD_LAZY|RTLD_GLOBAL)
+#define DLCLOSE(a) dlclose(a)
+#define DLSYM(a, b) dlsym(a, b)
+#define GETCWD(a, b) getcwd(b, a)
+#define CHDIR(a) chdir(a)
+#endif
+
+#ifdef WIN32
+typedef __int64 int64;
+typedef unsigned __int64 uint64;
+typedef unsigned char boolean;
+#else
+typedef long long int64;
+typedef unsigned long long uint64;
+typedef unsigned char boolean;
+#endif
+
+#define NO_OPTIONS          0x00000000
+
+#define FILTER_MASK         0x000000ff
+#define NO_FILTER           0x00000000
+#define SMOOTH_FILTER_MASK  0x0000000f
+#define NO_SMOOTH_FILTER    0x00000000
+#define SMOOTH_FILTER_1     0x00000001
+#define SMOOTH_FILTER_2     0x00000002
+#define SMOOTH_FILTER_3     0x00000003
+#define SMOOTH_FILTER_4     0x00000004
+#define SHARP_FILTER_MASK   0x000000f0
+#define NO_SHARP_FILTER     0x00000000
+#define SHARP_FILTER_1      0x00000010
+#define SHARP_FILTER_2      0x00000020
+
+#define ENHANCEMENT_MASK    0x00000f00
+#define NO_ENHANCEMENT      0x00000000
+#define X2_ENHANCEMENT      0x00000100
+#define X2SAI_ENHANCEMENT   0x00000200
+#define HQ2X_ENHANCEMENT    0x00000300
+#define LQ2X_ENHANCEMENT    0x00000400
+#define HQ4X_ENHANCEMENT    0x00000500
+#define HQ2XS_ENHANCEMENT   0x00000600
+#define LQ2XS_ENHANCEMENT   0x00000700
+
+#define COMPRESSION_MASK    0x0000f000
+#define NO_COMPRESSION      0x00000000
+#define FXT1_COMPRESSION    0x00001000
+#define NCC_COMPRESSION     0x00002000
+#define S3TC_COMPRESSION    0x00003000
+
+#define HIRESTEXTURES_MASK  0x000f0000
+#define NO_HIRESTEXTURES    0x00000000
+#define GHQ_HIRESTEXTURES   0x00010000
+#define RICE_HIRESTEXTURES  0x00020000
+#define JABO_HIRESTEXTURES  0x00030000
+
+#define COMPRESS_TEX        0x00100000
+#define COMPRESS_HIRESTEX   0x00200000
+#define GZ_TEXCACHE         0x00400000
+#define GZ_HIRESTEXCACHE    0x00800000
+#define DUMP_TEXCACHE       0x01000000
+#define DUMP_HIRESTEXCACHE  0x02000000
+#define TILE_HIRESTEX       0x04000000
+#define UNDEFINED_0         0x08000000
+#define FORCE16BPP_HIRESTEX 0x10000000
+#define FORCE16BPP_TEX      0x20000000
+#define LET_TEXARTISTS_FLY  0x40000000 /* a little freedom for texture artists */
+#define DUMP_TEX            0x80000000
+
+#ifndef __GLIDE_H__ /* GLIDE3 */
+/* from 3Dfx Interactive Inc. glide.h */
+#define GR_TEXFMT_ALPHA_8           0x2
+#define GR_TEXFMT_INTENSITY_8       0x3
+
+#define GR_TEXFMT_ALPHA_INTENSITY_44 0x4
+#define GR_TEXFMT_P_8                0x5
+
+#define GR_TEXFMT_RGB_565            0xa
+#define GR_TEXFMT_ARGB_1555          0xb
+#define GR_TEXFMT_ARGB_4444          0xc
+#define GR_TEXFMT_ALPHA_INTENSITY_88 0xd
+
+/* from 3Dfx Interactive Inc. g3ext.h */
+#define GR_TEXFMT_ARGB_CMP_FXT1      0x11
+
+#define GR_TEXFMT_ARGB_8888          0x12
+
+#define GR_TEXFMT_ARGB_CMP_DXT1      0x16
+#define GR_TEXFMT_ARGB_CMP_DXT3      0x18
+#define GR_TEXFMT_ARGB_CMP_DXT5      0x1A
+#endif /* GLIDE3 */
+
+struct GHQTexInfo {
+  unsigned char *data;
+  int width;
+  int height;
+  unsigned short format;
+
+  int smallLodLog2;
+  int largeLodLog2;
+  int aspectRatioLog2;
+
+  int tiles;
+  int untiled_width;
+  int untiled_height;
+
+  unsigned char is_hires_tex;
+};
+
+/* Callback to display hires texture info.
+ * Gonetz <gonetz(at)ngs.ru>
+ *
+ * void DispInfo(const char *format, ...)
+ * {
+ *   va_list args;
+ *   char buf[INFO_BUF];
+ *
+ *   va_start(args, format);
+ *   vsprintf(buf, format, args);
+ *   va_end(args);
+ *
+ *   printf(buf);
+ * }
+ */
+#define INFO_BUF 4095
+typedef void (*dispInfoFuncExt)(const wchar_t *format, ...);
+
+#ifndef TXFILTER_DLL
+boolean ext_ghq_init(int maxwidth, /* maximum texture width supported by hardware */
+                     int maxheight,/* maximum texture height supported by hardware */
+                     int maxbpp,   /* maximum texture bpp supported by hardware */
+                     int options,  /* options */
+                     int cachesize,/* cache textures to system memory */
+                     wchar_t *path,   /* plugin directory. must be smaller than MAX_PATH */
+                     wchar_t *ident,  /* name of ROM. must be no longer than 64 in character. */
+                     dispInfoFuncExt callback /* callback function to display info */
+                     );
+
+void ext_ghq_shutdown(void);
+
+boolean ext_ghq_txfilter(unsigned char *src,        /* input texture */
+                         int srcwidth,              /* width of input texture */
+                         int srcheight,             /* height of input texture */
+                         unsigned short srcformat,  /* format of input texture */
+                         uint64 g64crc,             /* glide64 crc */
+                         GHQTexInfo *info           /* output */
+                         );
+
+boolean ext_ghq_hirestex(uint64 g64crc,             /* glide64 crc */
+                         uint64 r_crc64,            /* checksum hi:palette low:texture */
+                         unsigned short *palette,   /* palette for CI textures */
+                         GHQTexInfo *info           /* output */
+                         );
+
+uint64 ext_ghq_checksum(unsigned char *src, /* input texture */
+                        int width,          /* width of texture */
+                        int height,         /* height of texture */
+                        int size,           /* type of texture pixel */
+                        int rowStride,      /* row stride in bytes */
+                        unsigned char *palette /* palette */
+                        );
+
+boolean ext_ghq_dmptx(unsigned char *src,   /* input texture (must be in 3Dfx Glide format) */
+                      int width,            /* width of texture */
+                      int height,           /* height of texture */
+                      int rowStridePixel,   /* row stride of input texture in pixels */
+                      unsigned short gfmt,  /* glide format of input texture */
+                      unsigned short n64fmt,/* N64 format hi:format low:size */
+                      uint64 r_crc64        /* checksum hi:palette low:texture */
+                      );
+
+boolean ext_ghq_reloadhirestex();
+#endif /* TXFILTER_DLL */
+
+#endif /* __EXT_TXFILTER_H__ */
diff --git a/Source/GlideHQ/GlideHQ.vcproj b/Source/GlideHQ/GlideHQ.vcproj
new file mode 100644
index 000000000..4e5d40162
--- /dev/null
+++ b/Source/GlideHQ/GlideHQ.vcproj
@@ -0,0 +1,295 @@
+<?xml version="1.0" encoding="shift_jis"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="GlideHQ"
+	ProjectGUID="{2DADDAA5-0F57-46ED-A974-747908DDC7F3}"
+	RootNamespace="GlideHQ"
+	TargetFrameworkVersion="0"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(SolutionDir)PropertySheets/Win32.Debug.vsprops"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/src&quot;;inc"
+				UsePrecompiledHeader="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(SolutionDir)PropertySheets/Win32.Release.vsprops"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="inc"
+				UsePrecompiledHeader="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="Ext_TxFilter.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_2xsai.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_hq2x.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_hq4x.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxCache.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxDbg.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxFilter.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxFilterExport.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxHiResCache.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxImage.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxQuantize.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxReSample.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxTexCache.cpp"
+				>
+			</File>
+			<File
+				RelativePath="TxUtil.cpp"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath="Ext_TxFilter.h"
+				>
+			</File>
+			<File
+				RelativePath="Internal.h"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters.h"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_2xsai.h"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_hq2x.h"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_hq4x.h"
+				>
+			</File>
+			<File
+				RelativePath="TextureFilters_lq2x.h"
+				>
+			</File>
+			<File
+				RelativePath="TxCache.h"
+				>
+			</File>
+			<File
+				RelativePath="TxDbg.h"
+				>
+			</File>
+			<File
+				RelativePath="TxFilter.h"
+				>
+			</File>
+			<File
+				RelativePath="TxHiResCache.h"
+				>
+			</File>
+			<File
+				RelativePath="TxImage.h"
+				>
+			</File>
+			<File
+				RelativePath="TxQuantize.h"
+				>
+			</File>
+			<File
+				RelativePath="TxTexCache.h"
+				>
+			</File>
+			<File
+				RelativePath="TxUtil.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="tc-1.1+"
+			>
+			<File
+				RelativePath=".\tc-1.1+\dxtn.c"
+				>
+			</File>
+			<File
+				RelativePath=".\tc-1.1+\fxt1.c"
+				>
+			</File>
+			<File
+				RelativePath=".\tc-1.1+\fxt1.h"
+				>
+			</File>
+			<File
+				RelativePath=".\tc-1.1+\internal.h"
+				>
+			</File>
+			<File
+				RelativePath=".\tc-1.1+\texstore.c"
+				>
+			</File>
+			<File
+				RelativePath=".\tc-1.1+\types.h"
+				>
+			</File>
+			<File
+				RelativePath=".\tc-1.1+\wrapper.c"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/Source/GlideHQ/README.txt b/Source/GlideHQ/README.txt
new file mode 100644
index 000000000..c903e5736
--- /dev/null
+++ b/Source/GlideHQ/README.txt
@@ -0,0 +1,94 @@
+/*
+ * GlideHQ (Texture enhancer library for Glide64)
+ * Version:  1.5
+ *
+ * Copyright (C) 2007  Hiroshi Morii aka KoolSmoky   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+About:
+This is a realtime texture enhancer library with hi-resolution texture
+pack support for Glide64 (http://glide64.emuxhaven.net). Traditional and
+non-traditional techniques have been used to achieve speed and high image
+quality even on a 9 year old 3Dfx Voodoo2.
+
+Although the 3Dfx Glide3x texture format naming conventions are used, the
+library can be expanded for generic use.
+
+Supported:
+OS: 32bit Linux and MS Windows
+Enhancers: Hq4x, Hq2x, Hq2xS, Lq2x, Lq2xS, Super2xSai, x2
+Filters: Smooth (1,2,3,4), Sharp (1,2)
+Compressors: FXT1, S3TC
+Input formats:  GR_TEXFMT_ALPHA_8,
+                GR_TEXFMT_RGB_565,
+                GR_TEXFMT_ARGB_1555,
+                GR_TEXFMT_ARGB_4444,
+                GR_TEXFMT_ARGB_8888,
+                GR_TEXFMT_ALPHA_INTENSITY_44,
+                GR_TEXFMT_ALPHA_INTENSITY_88
+Output formats: Same as input unless compression or hires packs are used.
+Hires texture packs: Rice format (Jabo and GlideHQ format coming later)
+
+Acknowledgments:
+I hope you enjoy GlideHQ (texture enhancer library for Glide64). Greatest
+thanks to Gonetz for making this happen in his busy time. We've rushed
+everything to share the eye-candy with all of you N64 emulation fans. I
+would also like to thank a great friend of mine, Daniel Borca for providing
+the texture compression code, Maxim Stepin (hq2x 4x), and Derek Liauw Kie Fa
+(2xSaI) for the filtering engines, Rice for his N64 graphics plugin source
+code, and Mudlord for the hq2xS lq2xS code. GlideHQ also uses the boost C++
+libraries, zlib general purpose compression library, and the Portable Network
+Graphics library. Thanks to all the developers for making them available. And
+special thanks to the Glide64 beta testing crew. Without their feedbacks
+this library would not have seen daylight. Thank you all.
+
+The source code for GlideHQ is released in hopes that it will be improved.
+I know the coding is not on par after so much late night caffeine boosts.
+If you have suggestions or modifications, please feel free to post them on
+the Glide64 forum at emuxhaven.
+
+Porting the library to other platforms should not be so hard. The coding is
+done with cross platform compatibility in mind and will build with GCC and
+GNU make. Currently supported are 32bit Linux and MS Windows.
+
+If you are looking for driver updates for your 3Dfx Interactive Inc. gfx 
+card, grab them from the forums at http://www.3dfxzone.it/enboard/
+Unbelievable as it seems, drivers are still being updated after 6 years
+from 3Dfx's demise.
+
+I know N64 rules, anyone up for PSX? :))
+
+-KoolSmoky
+
+References:
+[1] R.W. Floyd & L. Steinberg, An adaptive algorithm for spatial grey scale,
+    Proceedings of the Society of Information Display 17, pp75-77, 1976
+[2] Ken Turkowski, Filters for Common Resampling Tasks, Apple Computer 1990
+    http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf
+[3] Don P. Mitchell and Arun N. Netravali, Reconstruction Filters in Computer
+    Graphics, SIGGRAPH '88, Proceedings of the 15th annual conference on
+    Computer graphics and interactive techniques, pp221-228, 1988
+[4] J. F. Kaiser and W. A. Reed, Data smoothing using low-pass digital
+    filters, Rev. Sci. instrum. 48 (11), pp1447-1457, 1977
+[5] Maxim Stepin, hq4x Magnification Filter, http://www.hiend3d.com/hq4x.html
+[6] Derek Liauw Kie Fa, 2xSaI, http://elektron.its.tudelft.nl/~dalikifa
+[7] Dirk Stevens, Eagle engine http://www.retrofx.com/rfxtech.html
+[8] 3DFX_texture_compression_FXT1 and EXT_texture_compression_s3tc extension
+    specs from the OpenGL Extension Registry. http://oss.sgi.com/projects/
+    ogl-sample/registry/
diff --git a/Source/GlideHQ/TextureFilters.cpp b/Source/GlideHQ/TextureFilters.cpp
new file mode 100644
index 000000000..e0e2d9b43
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters.cpp
@@ -0,0 +1,715 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+#include <string.h>
+#include "TextureFilters.h"
+
+/************************************************************************/
+/* 2X filters                                                           */
+/************************************************************************/
+
+#define DWORD_MAKE(r, g, b, a)   ((uint32) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)))
+#define WORD_MAKE(r, g, b, a)   ((uint16) (((a) << 12) | ((r) << 8) | ((g) << 4) | (b)))
+
+// Basic 2x R8G8B8A8 filter with interpolation
+
+void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *pDst1, *pDst2;
+  uint32 *pSrc, *pSrc2;
+  uint32 nWidth = width;
+  uint32 nHeight = height;
+
+  uint32 b1;
+  uint32 g1;
+  uint32 r1;
+  uint32 a1;
+  uint32 b2;
+  uint32 g2;
+  uint32 r2;
+  uint32 a2;
+  uint32 b3;
+  uint32 g3;
+  uint32 r3;
+  uint32 a3;
+  uint32 b4;
+  uint32 g4;
+  uint32 r4;
+  uint32 a4;
+
+  uint32 xSrc;
+  uint32 ySrc;
+
+  for (ySrc = 0; ySrc < nHeight; ySrc++)
+  {
+    pSrc = (uint32*)(((uint8*)srcPtr)+ySrc*srcPitch);
+    pSrc2 = (uint32*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
+    pDst1 = (uint32*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
+    pDst2 = (uint32*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);
+
+    for (xSrc = 0; xSrc < nWidth; xSrc++)
+    {
+      b1 = (pSrc[xSrc]>>0)&0xFF;
+      g1 = (pSrc[xSrc]>>8)&0xFF;
+      r1 = (pSrc[xSrc]>>16)&0xFF;
+      a1 = (pSrc[xSrc]>>24)&0xFF;
+
+      if( xSrc<nWidth-1 )
+      {
+        b2 = (pSrc[xSrc+1]>>0)&0xFF;
+        g2 = (pSrc[xSrc+1]>>8)&0xFF;
+        r2 = (pSrc[xSrc+1]>>16)&0xFF;
+        a2 = (pSrc[xSrc+1]>>24)&0xFF;
+      }
+
+      if( ySrc<nHeight-1 )
+      {
+        b3 = (pSrc2[xSrc]>>0)&0xFF;
+        g3 = (pSrc2[xSrc]>>8)&0xFF;
+        r3 = (pSrc2[xSrc]>>16)&0xFF;
+        a3 = (pSrc2[xSrc]>>24)&0xFF;
+        if( xSrc<nWidth-1 )
+        {
+          b4 = (pSrc2[xSrc+1]>>0)&0xFF;
+          g4 = (pSrc2[xSrc+1]>>8)&0xFF;
+          r4 = (pSrc2[xSrc+1]>>16)&0xFF;
+          a4 = (pSrc2[xSrc+1]>>24)&0xFF;
+        }
+      }
+
+
+      // Pixel 1
+      pDst1[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 2
+      if( xSrc<nWidth-1 )
+      {
+        pDst1[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+      }
+      else
+        pDst1[xSrc*2+1] = pSrc[xSrc];
+
+
+      // Pixel 3
+      if( ySrc<nHeight-1 )
+      {
+        pDst2[xSrc*2] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+      }
+      else
+        pDst2[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 4
+      if( xSrc<nWidth-1 )
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
+        }
+        else
+        {
+          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+        }
+      }
+      else
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+        }
+        else
+          pDst2[xSrc*2+1] = pSrc[xSrc];
+      }
+    }
+  }
+}
+
+#if !_16BPP_HACK
+// Basic 2x R4G4B4A4 filter with interpolation
+void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *pDst1, *pDst2;
+  uint16 *pSrc, *pSrc2;
+  uint32 nWidth = width;
+  uint32 nHeight = height;
+
+  uint16 b1;
+  uint16 g1;
+  uint16 r1;
+  uint16 a1;
+  uint16 b2;
+  uint16 g2;
+  uint16 r2;
+  uint16 a2;
+  uint16 b3;
+  uint16 g3;
+  uint16 r3;
+  uint16 a3;
+  uint16 b4;
+  uint16 g4;
+  uint16 r4;
+  uint16 a4;
+
+  uint16 xSrc;
+  uint16 ySrc;
+
+  for (ySrc = 0; ySrc < nHeight; ySrc++)
+  {
+    pSrc = (uint16*)(((uint8*)srcPtr)+ySrc*srcPitch);
+    pSrc2 = (uint16*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
+    pDst1 = (uint16*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
+    pDst2 = (uint16*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);
+
+    for (xSrc = 0; xSrc < nWidth; xSrc++)
+    {
+      b1 = (pSrc[xSrc]>> 0)&0xF;
+      g1 = (pSrc[xSrc]>> 4)&0xF;
+      r1 = (pSrc[xSrc]>> 8)&0xF;
+      a1 = (pSrc[xSrc]>>12)&0xF;
+
+      if( xSrc<nWidth-1 )
+      {
+        b2 = (pSrc[xSrc+1]>> 0)&0xF;
+        g2 = (pSrc[xSrc+1]>> 4)&0xF;
+        r2 = (pSrc[xSrc+1]>> 8)&0xF;
+        a2 = (pSrc[xSrc+1]>>12)&0xF;
+      }
+
+      if( ySrc<nHeight-1 )
+      {
+        b3 = (pSrc2[xSrc]>> 0)&0xF;
+        g3 = (pSrc2[xSrc]>> 4)&0xF;
+        r3 = (pSrc2[xSrc]>> 8)&0xF;
+        a3 = (pSrc2[xSrc]>>12)&0xF;
+        if( xSrc<nWidth-1 )
+        {
+          b4 = (pSrc2[xSrc+1]>> 0)&0xF;
+          g4 = (pSrc2[xSrc+1]>> 4)&0xF;
+          r4 = (pSrc2[xSrc+1]>> 8)&0xF;
+          a4 = (pSrc2[xSrc+1]>>12)&0xF;
+        }
+      }
+
+      // Pixel 1
+      pDst1[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 2
+      if( xSrc<nWidth-1 )
+      {
+        pDst1[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+      }
+      else
+        pDst1[xSrc*2+1] = pSrc[xSrc];
+
+
+      // Pixel 3
+      if( ySrc<nHeight-1 )
+      {
+        pDst2[xSrc*2] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+      }
+      else
+        pDst2[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 4
+      if( xSrc<nWidth-1 )
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = WORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
+        }
+        else
+        {
+          pDst2[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+        }
+      }
+      else
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+        }
+        else
+          pDst2[xSrc*2+1] = pSrc[xSrc];
+      }
+    }
+  }
+}
+#endif /* !_16BPP_HACK */
+
+/*
+ * Sharp filters
+ * Hiroshi Morii <koolsmoky@users.sourceforge.net>
+ */
+void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint32 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint32 *_src1, *_src2, *_src3, *_dest;
+  uint32 val[4];
+  uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter )
+  {
+  case SHARP_FILTER_2:
+    mul1=1;
+    mul2=8;
+    mul3=12;
+    shift4=2;
+    break;
+  case SHARP_FILTER_1:
+  default:
+    mul1=1;
+    mul2=8;
+    mul3=16;
+    shift4=3;
+    break;
+  }
+
+  // setup rows
+  _src1 = src;
+  _src2 = _src1 + srcwidth;
+  _src3 = _src2 + srcwidth;
+  _dest = dest;
+
+  // copy the first row
+  memcpy(_dest, _src1, (srcwidth << 2));
+  _dest += srcwidth;
+  // filter 2nd row to 1 row before the last
+  for (y = 1; y < srcheight-1; y++) {
+    // copy the first pixel
+    _dest[0] = *_src2;
+    // filter 2nd pixel to 1 pixel before last
+    for (x = 1; x < srcwidth-1; x++) {
+      for (z=0; z<4; z++) {
+        t1 = *((uint8*)(_src1+x-1)+z);
+        t2 = *((uint8*)(_src1+x  )+z);
+        t3 = *((uint8*)(_src1+x+1)+z);
+        t4 = *((uint8*)(_src2+x-1)+z);
+        t5 = *((uint8*)(_src2+x  )+z);
+        t6 = *((uint8*)(_src2+x+1)+z);
+        t7 = *((uint8*)(_src3+x-1)+z);
+        t8 = *((uint8*)(_src3+x  )+z);
+        t9 = *((uint8*)(_src3+x+1)+z);
+        
+        if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
+          val[z]= ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
+          if (val[z] > 0xFF) val[z] = 0xFF;
+        } else {
+          val[z] = t5;
+        }
+      }
+      _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
+    }
+    // copy the ending pixel
+    _dest[srcwidth-1] = *(_src3 - 1);
+    // next row
+    _src1 += srcwidth;
+    _src2 += srcwidth;
+    _src3 += srcwidth;
+    _dest += srcwidth;
+  }
+  // copy the last row
+  memcpy(_dest, _src2, (srcwidth << 2));
+}
+
+#if !_16BPP_HACK
+void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint16 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint16 *_src1, *_src2, *_src3, *_dest;
+  uint16 val[4];
+  uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter ) {
+  case SHARP_FILTER_2:
+    mul1=1;
+    mul2=8;
+    mul3=12;
+    shift4=2;
+    break;
+  case SHARP_FILTER_1:
+  default:
+    mul1=1;
+    mul2=8;
+    mul3=16;
+    shift4=3;
+    break;
+  }
+
+  // setup rows
+  _src1 = src;
+  _src2 = _src1 + srcwidth;
+  _src3 = _src2 + srcwidth;
+  _dest = dest;
+
+  // copy the first row
+  memcpy(_dest, _src1, (srcwidth << 1));
+  _dest += srcwidth;
+  // filter 2nd row to 1 row before the last
+  for( y = 1; y < srcheight - 1; y++) {
+    // copy the first pixel
+    _dest[0] = *_src2;
+    // filter 2nd pixel to 1 pixel before last
+    for( x = 1; x < srcwidth - 1; x++) {
+      for( z = 0; z < 4; z++ ) {
+        /* Hiroshi Morii <koolsmoky@users.sourceforge.net>
+         * Read the entire 16bit pixel and then extract the A,R,G,B components.
+         */
+        uint32 shift = z << 2;
+        t1 = ((*((uint16*)(_src1+x-1))) >> shift) & 0xF;
+        t2 = ((*((uint16*)(_src1+x  ))) >> shift) & 0xF;
+        t3 = ((*((uint16*)(_src1+x+1))) >> shift) & 0xF;
+        t4 = ((*((uint16*)(_src2+x-1))) >> shift) & 0xF;
+        t5 = ((*((uint16*)(_src2+x  ))) >> shift) & 0xF;
+        t6 = ((*((uint16*)(_src2+x+1))) >> shift) & 0xF;
+        t7 = ((*((uint16*)(_src3+x-1))) >> shift) & 0xF;
+        t8 = ((*((uint16*)(_src3+x  ))) >> shift) & 0xF;
+        t9 = ((*((uint16*)(_src3+x+1))) >> shift) & 0xF;
+        
+        if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
+          val[z] = ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
+          if (val[z] > 0xF) val[z] = 0xF;
+        } else {
+          val[z] = t5;
+        }
+      }
+      _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
+    }
+    // copy the ending pixel
+    _dest[srcwidth-1] = *(_src3 - 1);
+    // next row
+    _src1 += srcwidth;
+    _src2 += srcwidth;
+    _src3 += srcwidth;
+    _dest += srcwidth;
+  }
+  // copy the last row
+  memcpy(_dest, _src2, (srcwidth << 1));
+}
+#endif /* !_16BPP_HACK */
+
+/*
+ * Smooth filters
+ * Hiroshi Morii <koolsmoky@users.sourceforge.net>
+ */
+void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint32 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint32 *_src1, *_src2, *_src3, *_dest;
+  uint32 val[4];
+  uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter ) {
+  case SMOOTH_FILTER_4:
+    mul1=1;
+    mul2=2;
+    mul3=4;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_3:
+    mul1=1;
+    mul2=1;
+    mul3=8;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_2:
+    mul1=1;
+    mul2=1;
+    mul3=2;
+    shift4=2;
+    break;
+  case SMOOTH_FILTER_1:
+  default:
+    mul1=1;
+    mul2=1;
+    mul3=6;
+    shift4=3;
+    break;
+  }
+
+  switch (filter) {
+  case SMOOTH_FILTER_3:
+  case SMOOTH_FILTER_4:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 2));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for (y = 1; y < srcheight - 1; y++){
+      // copy the first pixel
+      _dest[0] = _src2[0];
+      // filter 2nd pixel to 1 pixel before last
+      for (x = 1; x < srcwidth - 1; x++) {
+        for (z = 0; z < 4; z++ ) {
+          t1 = *((uint8*)(_src1+x-1)+z);
+          t2 = *((uint8*)(_src1+x  )+z);
+          t3 = *((uint8*)(_src1+x+1)+z);
+          t4 = *((uint8*)(_src2+x-1)+z);
+          t5 = *((uint8*)(_src2+x  )+z);
+          t6 = *((uint8*)(_src2+x+1)+z);
+          t7 = *((uint8*)(_src3+x-1)+z);
+          t8 = *((uint8*)(_src3+x  )+z);
+          t9 = *((uint8*)(_src3+x+1)+z);
+          /* the component value must not overflow 0xFF */
+          val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
+          if (val[z] > 0xFF) val[z] = 0xFF;
+        }
+        _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
+      }
+      // copy the ending pixel
+      _dest[srcwidth-1] = *(_src3 - 1);
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 2));
+    break;
+  case SMOOTH_FILTER_1:
+  case SMOOTH_FILTER_2:
+  default:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 2));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for (y = 1; y < srcheight - 1; y++) {
+      // filter 1st pixel to the last
+      if (y & 1) {
+        for( x = 0; x < srcwidth; x++) {
+          for( z = 0; z < 4; z++ ) {
+            t2 = *((uint8*)(_src1+x  )+z);
+            t5 = *((uint8*)(_src2+x  )+z);
+            t8 = *((uint8*)(_src3+x  )+z);
+            /* the component value must not overflow 0xFF */
+            val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
+            if (val[z] > 0xFF) val[z] = 0xFF;
+          }
+          _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
+        }
+      } else {
+         memcpy(_dest, _src2, (srcwidth << 2));
+      }
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 2));
+    break;
+  }
+}
+
+#if !_16BPP_HACK
+void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint16 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint16 *_src1, *_src2, *_src3, *_dest;
+  uint16 val[4];
+  uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter ) {
+  case SMOOTH_FILTER_4:
+    mul1=1;
+    mul2=2;
+    mul3=4;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_3:
+    mul1=1;
+    mul2=1;
+    mul3=8;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_2:
+    mul1=1;
+    mul2=1;
+    mul3=2;
+    shift4=2;
+    break;
+  case SMOOTH_FILTER_1:
+  default:
+    mul1=1;
+    mul2=1;
+    mul3=6;
+    shift4=3;
+    break;
+  }
+
+  switch (filter) {
+  case SMOOTH_FILTER_3:
+  case SMOOTH_FILTER_4:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 1));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for (y = 1; y < srcheight - 1; y++) {
+      // copy the first pixel
+      _dest[0] = *_src2;
+      // filter 2nd pixel to 1 pixel before last
+      for (x = 1; x < srcwidth - 1; x++) {
+        for (z = 0; z < 4; z++ ) {
+          /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
+          uint32 shift = z << 2;
+          t1 = ((*(uint16*)(_src1+x-1)) >> shift) & 0xF;
+          t2 = ((*(uint16*)(_src1+x  )) >> shift) & 0xF;
+          t3 = ((*(uint16*)(_src1+x+1)) >> shift) & 0xF;
+          t4 = ((*(uint16*)(_src2+x-1)) >> shift) & 0xF;
+          t5 = ((*(uint16*)(_src2+x  )) >> shift) & 0xF;
+          t6 = ((*(uint16*)(_src2+x+1)) >> shift) & 0xF;
+          t7 = ((*(uint16*)(_src3+x-1)) >> shift) & 0xF;
+          t8 = ((*(uint16*)(_src3+x  )) >> shift) & 0xF;
+          t9 = ((*(uint16*)(_src3+x+1)) >> shift) & 0xF;
+          /* the component value must not overflow 0xF */
+          val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
+          if (val[z] > 0xF) val[z] = 0xF;
+        }
+        _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
+      }
+      // copy the ending pixel
+      _dest[srcwidth-1] = *(_src3 - 1);
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 1));
+    break;
+  case SMOOTH_FILTER_1:
+  case SMOOTH_FILTER_2:
+  default:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 1));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for( y = 1; y < srcheight - 1; y++) {
+      if (y & 1) {
+        for( x = 0; x < srcwidth; x++) {
+          for( z = 0; z < 4; z++ ) {
+            /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
+            uint32 shift = z << 2;
+            t2 = ((*(uint16*)(_src1+x)) >> shift) & 0xF;
+            t5 = ((*(uint16*)(_src2+x)) >> shift) & 0xF;
+            t8 = ((*(uint16*)(_src3+x)) >> shift) & 0xF;
+            /* the component value must not overflow 0xF */
+            val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
+            if (val[z] > 0xF) val[z] = 0xF;
+          }
+          _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
+        }
+      } else {
+         memcpy(_dest, _src2, (srcwidth << 1));
+      }
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 1));
+    break;
+  }
+}
+#endif /* !_16BPP_HACK */
+
+void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) {
+  switch (filter & ENHANCEMENT_MASK) {
+  case HQ4X_ENHANCEMENT:
+    hq4x_8888((uint8*)src, (uint8*)dest, srcwidth, srcheight, srcwidth, (srcwidth << 4));
+    return;
+  case HQ2X_ENHANCEMENT:
+    hq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case HQ2XS_ENHANCEMENT:
+    hq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case LQ2X_ENHANCEMENT:
+    lq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case LQ2XS_ENHANCEMENT:
+    lq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case X2SAI_ENHANCEMENT:
+    Super2xSaI_8888((uint32*)src, (uint32*)dest, srcwidth, srcheight, srcwidth);
+    return;
+  case X2_ENHANCEMENT:
+    Texture2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  }
+
+  switch (filter & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) {
+  case SMOOTH_FILTER_1:
+  case SMOOTH_FILTER_2:
+  case SMOOTH_FILTER_3:
+  case SMOOTH_FILTER_4:
+    SmoothFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SMOOTH_FILTER_MASK));
+    return;
+  case SHARP_FILTER_1:
+  case SHARP_FILTER_2:
+    SharpFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SHARP_FILTER_MASK));
+    return;
+  }
+}
diff --git a/Source/GlideHQ/TextureFilters.h b/Source/GlideHQ/TextureFilters.h
new file mode 100644
index 000000000..7830eac5f
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters.h
@@ -0,0 +1,81 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef __TEXTUREFILTERS_H__
+#define __TEXTUREFILTERS_H__
+
+/* 16bpp filters are somewhat buggy and output image is not clean.
+ * Since there's not much time, we'll just convert them to ARGB8888
+ * and use 32bpp filters until fixed.
+ * (1:enable hack, 0:disable hack) */
+#define _16BPP_HACK 1
+
+#include "TxInternal.h"
+
+/* enhancers */
+void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+
+void hq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void hq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void lq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void lq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void Super2xSaI_8888(uint32 *srcPtr, uint32 *destPtr, uint32 width, uint32 height, uint32 pitch);
+
+void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+/* filters */
+void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter);
+
+void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter);
+
+/* helper */
+void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter);
+
+#if !_16BPP_HACK
+void hq4x_init(void);
+void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+void hq4x_565 (unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+
+void hq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void hq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void lq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void lq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void Super2xSaI_4444(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch);
+void Super2xSaI_1555(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch);
+void Super2xSaI_565 (uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch);
+void Super2xSaI_8   (uint8  *srcPtr, uint8  *destPtr, uint32 width, uint32 height, uint32 pitch);
+
+void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter);
+
+void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter);
+#endif
+
+#endif /* __TEXTUREFILTERS_H__ */
diff --git a/Source/GlideHQ/TextureFilters_2xsai.cpp b/Source/GlideHQ/TextureFilters_2xsai.cpp
new file mode 100644
index 000000000..38226df28
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_2xsai.cpp
@@ -0,0 +1,155 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Based on Derek Liauw Kie Fa and Rice1964 Super2xSaI code */
+
+#include "TextureFilters.h"
+
+#define GET_RESULT(A, B, C, D) ((A != C || A != D) - (B != C || B != D))
+
+void Super2xSaI_8888(uint32 *srcPtr, uint32 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_8888(A, B) ((A & 0xFEFEFEFE) >> 1) + ((B & 0xFEFEFEFE) >> 1) + (A & B & 0x01010101)
+#define SAI_Q_INTERPOLATE_8888(A, B, C, D) ((A & 0xFCFCFCFC) >> 2) + ((B & 0xFCFCFCFC) >> 2) + ((C & 0xFCFCFCFC) >> 2) + ((D & 0xFCFCFCFC) >> 2) \
+  + ((((A & 0x03030303) + (B & 0x03030303) + (C & 0x03030303) + (D & 0x03030303)) >> 2) & 0x03030303)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_8888
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_8888
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+  
+  uint32 color4, color5, color6;
+  uint32 color1, color2, color3;
+  uint32 colorA0, colorA1, colorA2, colorA3;
+  uint32 colorB0, colorB1, colorB2, colorB3;
+  uint32 colorS1, colorS2;
+  uint32 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+#if !_16BPP_HACK
+void Super2xSaI_4444(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_4444(A, B) ((A & 0xEEEE) >> 1) + ((B & 0xEEEE) >> 1) + (A & B & 0x1111)
+#define SAI_Q_INTERPOLATE_4444(A, B, C, D) ((A & 0xCCCC) >> 2) + ((B & 0xCCCC) >> 2) + ((C & 0xCCCC) >> 2) + ((D & 0xCCCC) >> 2) \
+  + ((((A & 0x3333) + (B & 0x3333) + (C & 0x3333) + (D & 0x3333)) >> 2) & 0x3333)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_4444
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_4444
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint16 color4, color5, color6;
+  uint16 color1, color2, color3;
+  uint16 colorA0, colorA1, colorA2, colorA3;
+  uint16 colorB0, colorB1, colorB2, colorB3;
+  uint16 colorS1, colorS2;
+  uint16 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+void Super2xSaI_1555(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_1555(A, B) ((A & 0x7BDE) >> 1) + ((B & 0x7BDE) >> 1) + (A & B & 0x8421)
+#define SAI_Q_INTERPOLATE_1555(A, B, C, D) ((A & 0x739C) >> 2) + ((B & 0x739C) >> 2) + ((C & 0x739C) >> 2) + ((D & 0x739C) >> 2) \
+  + ((((A & 0x8C63) + (B & 0x8C63) + (C & 0x8C63) + (D & 0x8C63)) >> 2) & 0x8C63)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_1555
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_1555
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint16 color4, color5, color6;
+  uint16 color1, color2, color3;
+  uint16 colorA0, colorA1, colorA2, colorA3;
+  uint16 colorB0, colorB1, colorB2, colorB3;
+  uint16 colorS1, colorS2;
+  uint16 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+void Super2xSaI_565(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_565(A, B) ((A & 0xF7DE) >> 1) + ((B & 0xF7DE) >> 1) + (A & B & 0x0821)
+#define SAI_Q_INTERPOLATE_565(A, B, C, D) ((A & 0xE79C) >> 2) + ((B & 0xE79C) >> 2) + ((C & 0xE79C) >> 2) + ((D & 0xE79C) >> 2) \
+  + ((((A & 0x1863) + (B & 0x1863) + (C & 0x1863) + (D & 0x1863)) >> 2) & 0x1863)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_565
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_565
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint16 color4, color5, color6;
+  uint16 color1, color2, color3;
+  uint16 colorA0, colorA1, colorA2, colorA3;
+  uint16 colorB0, colorB1, colorB2, colorB3;
+  uint16 colorS1, colorS2;
+  uint16 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+void Super2xSaI_8(uint8 *srcPtr, uint8 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_8(A, B) ((A & 0xFE) >> 1) + ((B & 0xFE) >> 1) + (A & B & 0x01)
+#define SAI_Q_INTERPOLATE_8(A, B, C, D) ((A & 0xFC) >> 2) + ((B & 0xFC) >> 2) + ((C & 0xFC) >> 2) + ((D & 0xFC) >> 2) \
+  + ((((A & 0x03) + (B & 0x03) + (C & 0x03) + (D & 0x03)) >> 2) & 0x03)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_8
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_8
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint8 color4, color5, color6;
+  uint8 color1, color2, color3;
+  uint8 colorA0, colorA1, colorA2, colorA3;
+  uint8 colorB0, colorB1, colorB2, colorB3;
+  uint8 colorS1, colorS2;
+  uint8 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+#endif /* !_16BPP_HACK */
diff --git a/Source/GlideHQ/TextureFilters_2xsai.h b/Source/GlideHQ/TextureFilters_2xsai.h
new file mode 100644
index 000000000..f6696ae06
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_2xsai.h
@@ -0,0 +1,145 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Based on Derek Liauw Kie Fa and Rice1964 Super2xSaI code */
+
+  int row0, row1, row2, row3;
+  int col0, col1, col2, col3;
+
+  uint16 x;
+  uint16 y;
+
+  for (y = 0; y < height; y++) {
+    if ((y > 0) && (y < height - 1)) {
+      row0 = width;
+      row0 = -row0;
+      row1 = 0;
+      row2 = width;
+      row3 = (y == height - 2 ? width : width << 1);
+    } else {
+      row0 = 0;
+      row1 = 0;
+      row2 = 0;
+      row3 = 0;
+    }
+
+    for (x = 0; x < width; x++) {
+//--------------------------------------- B0 B1 B2 B3
+//                                         4  5  6 S2
+//                                         1  2  3 S1
+//                                        A0 A1 A2 A3
+      if ((x > 0) && (x < width - 1)) {
+        col0 = -1;
+        col1 = 0;
+        col2 = 1;
+        col3 = (x == width - 2 ? 1 : 2);
+      } else {
+        col0 = 0;
+        col1 = 0;
+        col2 = 0;
+        col3 = 0;
+      }
+
+      colorB0 = *(srcPtr + col0 + row0);
+      colorB1 = *(srcPtr + col1 + row0);
+      colorB2 = *(srcPtr + col2 + row0);
+      colorB3 = *(srcPtr + col3 + row0);
+
+      color4 = *(srcPtr + col0 + row1);
+      color5 = *(srcPtr + col1 + row1);
+      color6 = *(srcPtr + col2 + row1);
+      colorS2 = *(srcPtr + col3 + row1);
+
+      color1 = *(srcPtr + col0 + row2);
+      color2 = *(srcPtr + col1 + row2);
+      color3 = *(srcPtr + col2 + row2);
+      colorS1 = *(srcPtr + col3 + row2);
+
+      colorA0 = *(srcPtr + col0 + row3);
+      colorA1 = *(srcPtr + col1 + row3);
+      colorA2 = *(srcPtr + col2 + row3);
+      colorA3 = *(srcPtr + col3 + row3);
+
+//--------------------------------------
+      if (color2 == color6 && color5 != color3)
+        product2b = product1b = color2;
+      else if (color5 == color3 && color2 != color6)
+        product2b = product1b = color5;
+      else if (color5 == color3 && color2 == color6) {
+        int r = 0;
+
+        r += GET_RESULT(color6, color5, color1, colorA1);
+        r += GET_RESULT(color6, color5, color4, colorB1);
+        r += GET_RESULT(color6, color5, colorA2, colorS1);
+        r += GET_RESULT(color6, color5, colorB2, colorS2);
+
+        if (r > 0)
+          product2b = product1b = color6;
+        else if (r < 0)
+          product2b = product1b = color5;
+        else
+          product2b = product1b = SAI_INTERPOLATE(color5, color6);
+
+      } else {
+
+        if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
+          product2b = SAI_Q_INTERPOLATE(color3, color3, color3, color2);
+        else if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
+          product2b = SAI_Q_INTERPOLATE(color2, color2, color2, color3);
+        else
+          product2b = SAI_INTERPOLATE(color2, color3);
+
+        if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
+          product1b = SAI_Q_INTERPOLATE(color6, color6, color6, color5);
+        else if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
+          product1b = SAI_Q_INTERPOLATE(color6, color5, color5, color5);
+        else
+          product1b = SAI_INTERPOLATE(color5, color6);
+      }
+
+      if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
+        product2a = SAI_INTERPOLATE(color2, color5);
+      else if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
+        product2a = SAI_INTERPOLATE(color2, color5);
+      else
+        product2a = color2;
+
+      if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
+        product1a = SAI_INTERPOLATE(color2, color5);
+      else if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
+        product1a = SAI_INTERPOLATE(color2, color5);
+      else
+        product1a = color5;
+
+
+      destPtr[0] = product1a;
+      destPtr[1] = product1b;
+      destPtr[destWidth] = product2a;
+      destPtr[destWidth + 1] = product2b;
+
+      srcPtr++;
+      destPtr += 2;
+    }
+    srcPtr += (pitch-width);
+    destPtr += (((pitch-width)<<1)+(pitch<<1));
+  }
diff --git a/Source/GlideHQ/TextureFilters_hq2x.cpp b/Source/GlideHQ/TextureFilters_hq2x.cpp
new file mode 100644
index 000000000..33cb99530
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_hq2x.cpp
@@ -0,0 +1,1510 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+/* 2007 Mudlord - Added hq2xS lq2xS filters */
+
+#include "TextureFilters.h"
+
+/************************************************************************/
+/* hq2x filters                                                         */
+/************************************************************************/
+
+/***************************************************************************/
+/* Basic types */
+
+/***************************************************************************/
+/* interpolation */
+
+//static unsigned interp_bits_per_pixel;
+
+#if !_16BPP_HACK
+#define INTERP_16_MASK_1_3(v) ((v)&0x0F0F)
+#define INTERP_16_MASK_SHIFT_2_4(v) (((v)&0xF0F0)>>4)
+#define INTERP_16_MASK_SHIFTBACK_2_4(v) ((INTERP_16_MASK_1_3(v))<<4)
+
+static uint16 hq2x_interp_16_521(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*5 + INTERP_16_MASK_1_3(p2)*2 + INTERP_16_MASK_1_3(p3)*1) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*5 + INTERP_16_MASK_SHIFT_2_4(p2)*2 + INTERP_16_MASK_SHIFT_2_4(p3)*1) / 8);
+}
+
+static uint16 hq2x_interp_16_332(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*3 + INTERP_16_MASK_1_3(p2)*3 + INTERP_16_MASK_1_3(p3)*2) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*3 + INTERP_16_MASK_SHIFT_2_4(p2)*3 + INTERP_16_MASK_SHIFT_2_4(p3)*2) / 8);
+}
+
+static uint16 hq2x_interp_16_611(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*6 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*6 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint16 hq2x_interp_16_71(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*7 + INTERP_16_MASK_1_3(p2)) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*7 + INTERP_16_MASK_SHIFT_2_4(p2)) / 8);
+}
+
+static uint16 hq2x_interp_16_211(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*2 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 4)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*2 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 4);
+}
+
+static uint16 hq2x_interp_16_772(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3(((INTERP_16_MASK_1_3(p1) + INTERP_16_MASK_1_3(p2))*7 + INTERP_16_MASK_1_3(p3)*2) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4(((INTERP_16_MASK_SHIFT_2_4(p1) + INTERP_16_MASK_SHIFT_2_4(p2))*7 + INTERP_16_MASK_SHIFT_2_4(p3)*2) / 16);
+}
+
+static uint16 hq2x_interp_16_11(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1) + INTERP_16_MASK_1_3(p2)) / 2)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1) + INTERP_16_MASK_SHIFT_2_4(p2)) / 2);
+}
+
+static uint16 hq2x_interp_16_31(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*3 + INTERP_16_MASK_1_3(p2)) / 4)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*3 + INTERP_16_MASK_SHIFT_2_4(p2)) / 4);
+}
+
+static uint16 hq2x_interp_16_1411(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*14 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*14 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 16);
+}
+
+static uint16 hq2x_interp_16_431(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*4 + INTERP_16_MASK_1_3(p2)*3 + INTERP_16_MASK_1_3(p3)) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*4 + INTERP_16_MASK_SHIFT_2_4(p2)*3 + INTERP_16_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint16 hq2x_interp_16_53(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*5 + INTERP_16_MASK_1_3(p2)*3) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*5 + INTERP_16_MASK_SHIFT_2_4(p2)*3) / 8);
+}
+
+static uint16 hq2x_interp_16_151(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*15 + INTERP_16_MASK_1_3(p2)) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*15 + INTERP_16_MASK_SHIFT_2_4(p2)) / 16);
+}
+
+static uint16 hq2x_interp_16_97(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*9 + INTERP_16_MASK_1_3(p2)*7) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*9 + INTERP_16_MASK_SHIFT_2_4(p2)*7) / 16);
+}
+#endif /* !_16BPP_HACK */
+
+#define INTERP_32_MASK_1_3(v) ((v)&0x00FF00FF)
+#define INTERP_32_MASK_SHIFT_2_4(v) (((v)&0xFF00FF00)>>8)
+#define INTERP_32_MASK_SHIFTBACK_2_4(v) (((INTERP_32_MASK_1_3(v))<<8))
+
+static uint32 hq2x_interp_32_521(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*5 + INTERP_32_MASK_1_3(p2)*2 + INTERP_32_MASK_1_3(p3)*1) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*5 + INTERP_32_MASK_SHIFT_2_4(p2)*2 + INTERP_32_MASK_SHIFT_2_4(p3)*1) / 8);
+}
+
+static uint32 hq2x_interp_32_332(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*3 + INTERP_32_MASK_1_3(p2)*3 + INTERP_32_MASK_1_3(p3)*2) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*3 + INTERP_32_MASK_SHIFT_2_4(p2)*3 + INTERP_32_MASK_SHIFT_2_4(p3)*2) / 8);
+}
+
+static uint32 hq2x_interp_32_211(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*2 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 4)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*2 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 4);
+}
+
+static uint32 hq2x_interp_32_611(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*6 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*6 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint32 hq2x_interp_32_71(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*7 + INTERP_32_MASK_1_3(p2)) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*7 + INTERP_32_MASK_SHIFT_2_4(p2)) / 8);
+}
+
+static uint32 hq2x_interp_32_772(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3(((INTERP_32_MASK_1_3(p1) + INTERP_32_MASK_1_3(p2))*7 + INTERP_32_MASK_1_3(p3)*2) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4(((INTERP_32_MASK_SHIFT_2_4(p1) + INTERP_32_MASK_SHIFT_2_4(p2))*7 + INTERP_32_MASK_SHIFT_2_4(p3)*2) / 16);
+}
+
+static uint32 hq2x_interp_32_11(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1) + INTERP_32_MASK_1_3(p2)) / 2)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1) + INTERP_32_MASK_SHIFT_2_4(p2)) / 2);
+}
+
+static uint32 hq2x_interp_32_31(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*3 + INTERP_32_MASK_1_3(p2)) / 4)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*3 + INTERP_32_MASK_SHIFT_2_4(p2)) / 4);
+}
+
+static uint32 hq2x_interp_32_1411(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*14 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*14 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 16);
+}
+
+static uint32 hq2x_interp_32_431(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*4 + INTERP_32_MASK_1_3(p2)*3 + INTERP_32_MASK_1_3(p3)) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*4 + INTERP_32_MASK_SHIFT_2_4(p2)*3 + INTERP_32_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint32 hq2x_interp_32_53(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*5 + INTERP_32_MASK_1_3(p2)*3) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*5 + INTERP_32_MASK_SHIFT_2_4(p2)*3) / 8);
+}
+
+static uint32 hq2x_interp_32_151(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*15 + INTERP_32_MASK_1_3(p2)) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*15 + INTERP_32_MASK_SHIFT_2_4(p2)) / 16);
+}
+
+static uint32 hq2x_interp_32_97(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*9 + INTERP_32_MASK_1_3(p2)*7) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*9 + INTERP_32_MASK_SHIFT_2_4(p2)*7) / 16);
+}
+
+/***************************************************************************/
+/* diff */
+
+#define INTERP_Y_LIMIT (0x30*4)
+#define INTERP_U_LIMIT (0x07*4)
+#define INTERP_V_LIMIT (0x06*8)
+
+#if !_16BPP_HACK
+static int hq2x_interp_16_diff(uint16 p1, uint16 p2)
+{
+  int r, g, b;
+  int y, u, v;
+
+  if (p1 == p2)
+    return 0;
+
+  b = (int)((p1 & 0x000F) - (p2 & 0x000F));
+  g = (int)((p1 & 0x00F0) - (p2 & 0x00F0)) >> 4;
+  r = (int)((p1 & 0x0F00) - (p2 & 0x0F00)) >> 8;
+
+  y = r + g + b;
+  u = r - b;
+  v = -r + 2*g - b;
+
+  if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
+    return 1;
+
+  if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
+    return 1;
+
+  if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
+    return 1;
+
+  return 0;
+}
+#endif /* !_16BPP_HACK */
+
+static int hq2x_interp_32_diff(uint32 p1, uint32 p2)
+{
+  int r, g, b;
+  int y, u, v;
+
+  if ((p1 & 0xF8F8F8) == (p2 & 0xF8F8F8))
+    return 0;
+
+  b = (int)((p1 & 0xFF) - (p2 & 0xFF));
+  g = (int)((p1 & 0xFF00) - (p2 & 0xFF00)) >> 8;
+  r = (int)((p1 & 0xFF0000) - (p2 & 0xFF0000)) >> 16;
+
+  y = r + g + b;
+  u = r - b;
+  v = -r + 2*g - b;
+
+  if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
+    return 1;
+
+  if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
+    return 1;
+
+  if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
+    return 1;
+
+  return 0;
+}
+
+/*static void interp_set(unsigned bits_per_pixel)
+{
+   interp_bits_per_pixel = bits_per_pixel;
+}*/
+
+#if !_16BPP_HACK
+static void hq2x_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    if (hq2x_interp_16_diff(c[0], c[4]))
+      mask |= 1 << 0;
+    if (hq2x_interp_16_diff(c[1], c[4]))
+      mask |= 1 << 1;
+    if (hq2x_interp_16_diff(c[2], c[4]))
+      mask |= 1 << 2;
+    if (hq2x_interp_16_diff(c[3], c[4]))
+      mask |= 1 << 3;
+    if (hq2x_interp_16_diff(c[5], c[4]))
+      mask |= 1 << 4;
+    if (hq2x_interp_16_diff(c[6], c[4]))
+      mask |= 1 << 5;
+    if (hq2x_interp_16_diff(c[7], c[4]))
+      mask |= 1 << 6;
+    if (hq2x_interp_16_diff(c[8], c[4]))
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR hq2x_interp_16_diff(c[1], c[5])
+#define HQ2X_MDR hq2x_interp_16_diff(c[5], c[7])
+#define HQ2X_MDL hq2x_interp_16_diff(c[7], c[3])
+#define HQ2X_MUL hq2x_interp_16_diff(c[3], c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void hq2xS_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      int r,g,b;
+      if (interp_bits_per_pixel == 16) {
+        b = (int)((c[j] & 0x1F)) << 3;
+        g = (int)((c[j] & 0x7E0)) >> 3;
+        r = (int)((c[j] & 0xF800)) >> 8;
+      } else {
+        b = (int)((c[j] & 0x1F)) << 3;
+        g = (int)((c[j] & 0x3E0)) >> 2;
+        r = (int)((c[j] & 0x7C00)) >> 7;
+      }
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+#endif /* !_16BPP_HACK */
+
+static void hq2x_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = src0[0];
+      c[3] = src1[0];
+      c[6] = src2[0];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = src0[0];
+      c[5] = src1[0];
+      c[8] = src2[0];
+    }
+
+    mask = 0;
+
+    if (hq2x_interp_32_diff(c[0], c[4]))
+      mask |= 1 << 0;
+    if (hq2x_interp_32_diff(c[1], c[4]))
+      mask |= 1 << 1;
+    if (hq2x_interp_32_diff(c[2], c[4]))
+      mask |= 1 << 2;
+    if (hq2x_interp_32_diff(c[3], c[4]))
+      mask |= 1 << 3;
+    if (hq2x_interp_32_diff(c[5], c[4]))
+      mask |= 1 << 4;
+    if (hq2x_interp_32_diff(c[6], c[4]))
+      mask |= 1 << 5;
+    if (hq2x_interp_32_diff(c[7], c[4]))
+      mask |= 1 << 6;
+    if (hq2x_interp_32_diff(c[8], c[4]))
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR hq2x_interp_32_diff(c[1], c[5])
+#define HQ2X_MDR hq2x_interp_32_diff(c[5], c[7])
+#define HQ2X_MDL hq2x_interp_32_diff(c[7], c[3])
+#define HQ2X_MUL hq2x_interp_32_diff(c[3], c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void hq2xS_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = src0[0];
+      c[3] = src1[0];
+      c[6] = src2[0];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = src0[0];
+      c[5] = src1[0];
+      c[8] = src2[0];
+    }
+    
+    mask = 0;
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      const int b = (int)((c[j] & 0xF8));
+      const int g = (int)((c[j] & 0xF800)) >> 8;
+      const int r = (int)((c[j] & 0xF80000)) >> 16;
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+/***************************************************************************/
+/* LQ2x C implementation */
+
+/*
+* This effect is derived from the hq2x effect made by Maxim Stepin
+*/
+
+#if !_16BPP_HACK
+static void lq2x_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    if (c[0] != c[4])
+      mask |= 1 << 0;
+    if (c[1] != c[4])
+      mask |= 1 << 1;
+    if (c[2] != c[4])
+      mask |= 1 << 2;
+    if (c[3] != c[4])
+      mask |= 1 << 3;
+    if (c[5] != c[4])
+      mask |= 1 << 4;
+    if (c[6] != c[4])
+      mask |= 1 << 5;
+    if (c[7] != c[4])
+      mask |= 1 << 6;
+    if (c[8] != c[4])
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR (c[1] != c[5])
+#define HQ2X_MDR (c[5] != c[7])
+#define HQ2X_MDL (c[7] != c[3])
+#define HQ2X_MUL (c[3] != c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void lq2xS_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      const int b = (int)((c[j] & 0xF8));
+      const int g = (int)((c[j] & 0xF800)) >> 8;
+      const int r = (int)((c[j] & 0xF80000)) >> 16;
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+#endif /* !_16BPP_HACK */
+
+static void lq2x_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    if (c[0] != c[4])
+      mask |= 1 << 0;
+    if (c[1] != c[4])
+      mask |= 1 << 1;
+    if (c[2] != c[4])
+      mask |= 1 << 2;
+    if (c[3] != c[4])
+      mask |= 1 << 3;
+    if (c[5] != c[4])
+      mask |= 1 << 4;
+    if (c[6] != c[4])
+      mask |= 1 << 5;
+    if (c[7] != c[4])
+      mask |= 1 << 6;
+    if (c[8] != c[4])
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR (c[1] != c[5])
+#define HQ2X_MDR (c[5] != c[7])
+#define HQ2X_MDL (c[7] != c[3])
+#define HQ2X_MUL (c[3] != c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void lq2xS_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      const int b = (int)((c[j] & 0xF8));
+      const int g = (int)((c[j] & 0xF800)) >> 8;
+      const int r = (int)((c[j] & 0xF80000)) >> 16;
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+#if !_16BPP_HACK
+void hq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *dst0 = (uint16 *)dstPtr;
+  uint16 *dst1 = dst0 + (dstPitch >> 1);
+
+  uint16 *src0 = (uint16 *)srcPtr;
+  uint16 *src1 = src0 + (srcPitch >> 1);
+  uint16 *src2 = src1 + (srcPitch >> 1);
+
+  int count;
+
+  hq2x_16_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2x_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  hq2x_16_def(dst0, dst1, src0, src1, src1, width);
+}
+
+
+void hq2xS_16(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
+              u8 *dstPtr, u32 dstPitch, int width, int height)
+{
+  u16 *dst0 = (u16 *)dstPtr;
+  u16 *dst1 = dst0 + (dstPitch >> 1);
+  
+  u16 *src0 = (u16 *)srcPtr;
+  u16 *src1 = src0 + (srcPitch >> 1);
+  u16 *src2 = src1 + (srcPitch >> 1);
+  
+  hq2xS_16_def(dst0, dst1, src0, src0, src1, width);
+  
+  int count = height;
+  
+  count -= 2;
+  while(count) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2xS_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  hq2xS_16_def(dst0, dst1, src0, src1, src1, width);
+}
+#endif /* !_16BPP_HACK */
+
+void hq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+
+  int count;
+
+  hq2x_32_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2x_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  hq2x_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+void hq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+  hq2xS_32_def(dst0, dst1, src0, src0, src1, width);
+  
+  int count = height;
+  
+  count -= 2;
+  while(count) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2xS_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  hq2xS_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+#if !_16BPP_HACK
+void lq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *dst0 = (uint16 *)dstPtr;
+  uint16 *dst1 = dst0 + (dstPitch >> 1);
+
+  uint16 *src0 = (uint16 *)srcPtr;
+  uint16 *src1 = src0 + (srcPitch >> 1);
+  uint16 *src2 = src1 + (srcPitch >> 1);
+
+  int count;
+
+  lq2x_16_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2x_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  lq2x_16_def(dst0, dst1, src0, src1, src1, width);
+}
+
+void lq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *dst0 = (uint16 *)dstPtr;
+  uint16 *dst1 = dst0 + (dstPitch >> 1);
+
+  uint16 *src0 = (uint16 *)srcPtr;
+  uint16 *src1 = src0 + (srcPitch >> 1);
+  uint16 *src2 = src1 + (srcPitch >> 1);
+
+  int count;
+
+  lq2xS_16_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2x_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  lq2xS_16_def(dst0, dst1, src0, src1, src1, width);
+}
+#endif /* !_16BPP_HACK */
+
+void lq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+
+  int count;
+
+  lq2x_32_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2x_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  lq2x_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+void lq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+
+  int count;
+
+  lq2xS_32_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2x_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  lq2xS_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+/************************************************************************/
+/* hq3x filters                                                         */
+/************************************************************************/
+
+/************************************************************************/
+/* scale2x filters                                                      */
+/************************************************************************/
+
+/************************************************************************/
+/* scale3x filters                                                      */
+/************************************************************************/
+
diff --git a/Source/GlideHQ/TextureFilters_hq2x.h b/Source/GlideHQ/TextureFilters_hq2x.h
new file mode 100644
index 000000000..7946323b8
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_hq2x.h
@@ -0,0 +1,1847 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+case 0 : 
+case 1 : 
+case 4 : 
+case 5 : 
+case 32 : 
+case 33 : 
+case 36 : 
+case 37 : 
+case 128 : 
+case 129 : 
+case 132 : 
+case 133 : 
+case 160 : 
+case 161 : 
+case 164 : 
+case 165 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 2 : 
+case 34 : 
+case 130 : 
+case 162 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 3 : 
+case 35 : 
+case 131 : 
+case 163 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 6 : 
+case 38 : 
+case 134 : 
+case 166 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 7 : 
+case 39 : 
+case 135 : 
+case 167 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 8 : 
+case 12 : 
+case 136 : 
+case 140 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+} break;
+case 9 : 
+case 13 : 
+case 137 : 
+case 141 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+} break;
+case 10 : 
+case 138 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 11 : 
+case 139 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 14 : 
+case 142 : 
+{
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+    P1 = I31(4, 5);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I521(4, 1, 5);
+  }
+} break;
+case 15 : 
+case 143 : 
+{
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P1 = I31(4, 5);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I521(4, 1, 5);
+  }
+} break;
+case 16 : 
+case 17 : 
+case 48 : 
+case 49 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+} break;
+case 18 : 
+case 50 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 19 : 
+case 51 : 
+{
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P0 = I31(4, 3);
+    P1 = I31(4, 2);
+  } else {
+    P0 = I521(4, 1, 3);
+    P1 = I332(1, 5, 4);
+  }
+} break;
+case 20 : 
+case 21 : 
+case 52 : 
+case 53 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+} break;
+case 22 : 
+case 54 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 23 : 
+case 55 : 
+{
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P0 = I31(4, 3);
+    P1 = IC(4);
+  } else {
+    P0 = I521(4, 1, 3);
+    P1 = I332(1, 5, 4);
+  }
+} break;
+case 24 : 
+case 66 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 25 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 26 : 
+case 31 : 
+case 95 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 27 : 
+case 75 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 28 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 29 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 30 : 
+case 86 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 40 : 
+case 44 : 
+case 168 : 
+case 172 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 41 : 
+case 45 : 
+case 169 : 
+case 173 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 42 : 
+case 170 : 
+{
+  P1 = I31(4, 2);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+    P2 = I31(4, 7);
+  } else {
+    P0 = I332(1, 3, 4);
+    P2 = I521(4, 3, 7);
+  }
+} break;
+case 43 : 
+case 171 : 
+{
+  P1 = I31(4, 2);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P2 = I31(4, 7);
+  } else {
+    P0 = I332(1, 3, 4);
+    P2 = I521(4, 3, 7);
+  }
+} break;
+case 46 : 
+case 174 : 
+{
+  P1 = I31(4, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 47 : 
+case 175 : 
+{
+  P1 = I31(4, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 56 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 57 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 58 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 59 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 60 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 61 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 62 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 63 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 64 : 
+case 65 : 
+case 68 : 
+case 69 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 67 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 70 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 71 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 72 : 
+case 76 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 73 : 
+case 77 : 
+{
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P0 = I31(4, 1);
+    P2 = I31(4, 6);
+  } else {
+    P0 = I521(4, 3, 1);
+    P2 = I332(3, 7, 4);
+  }
+} break;
+case 74 : 
+case 107 : 
+case 123 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 78 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 79 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 80 : 
+case 81 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 82 : 
+case 214 : 
+case 222 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 83 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 84 : 
+case 85 : 
+{
+  P0 = I211(4, 1, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P1 = I31(4, 1);
+    P3 = I31(4, 8);
+  } else {
+    P1 = I521(4, 5, 1);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 87 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 88 : 
+case 248 : 
+case 250 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 89 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 90 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 91 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 92 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 93 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 94 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 96 : 
+case 97 : 
+case 100 : 
+case 101 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 98 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 99 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 102 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 103 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 104 : 
+case 108 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 105 : 
+case 109 : 
+{
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P0 = I31(4, 1);
+    P2 = IC(4);
+  } else {
+    P0 = I521(4, 3, 1);
+    P2 = I332(3, 7, 4);
+  }
+} break;
+case 106 : 
+case 120 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 110 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 111 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 112 : 
+case 113 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  if (HQ2X_MDR) {
+    P2 = I31(4, 3);
+    P3 = I31(4, 8);
+  } else {
+    P2 = I521(4, 7, 3);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 114 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 115 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 116 : 
+case 117 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 118 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 119 : 
+{
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P0 = I31(4, 3);
+    P1 = IC(4);
+  } else {
+    P0 = I521(4, 1, 3);
+    P1 = I332(1, 5, 4);
+  }
+} break;
+case 121 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 122 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 124 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 125 : 
+{
+  P1 = I31(4, 1);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P0 = I31(4, 1);
+    P2 = IC(4);
+  } else {
+    P0 = I521(4, 3, 1);
+    P2 = I332(3, 7, 4);
+  }
+} break;
+case 126 : 
+{
+  P0 = I31(4, 0);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 127 : 
+{
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 144 : 
+case 145 : 
+case 176 : 
+case 177 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+} break;
+case 146 : 
+case 178 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+    P3 = I31(4, 7);
+  } else {
+    P1 = I332(1, 5, 4);
+    P3 = I521(4, 5, 7);
+  }
+} break;
+case 147 : 
+case 179 : 
+{
+  P0 = I31(4, 3);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 148 : 
+case 149 : 
+case 180 : 
+case 181 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+} break;
+case 150 : 
+case 182 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+    P3 = I31(4, 7);
+  } else {
+    P1 = I332(1, 5, 4);
+    P3 = I521(4, 5, 7);
+  }
+} break;
+case 151 : 
+case 183 : 
+{
+  P0 = I31(4, 3);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 152 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 153 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 154 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 155 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 156 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 157 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 158 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 159 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 184 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 185 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 186 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 187 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P2 = I31(4, 7);
+  } else {
+    P0 = I332(1, 3, 4);
+    P2 = I521(4, 3, 7);
+  }
+} break;
+case 188 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 189 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 190 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 7);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+    P3 = I31(4, 7);
+  } else {
+    P1 = I332(1, 5, 4);
+    P3 = I521(4, 5, 7);
+  }
+} break;
+case 191 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 192 : 
+case 193 : 
+case 196 : 
+case 197 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 194 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 195 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 198 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 199 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 200 : 
+case 204 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+    P3 = I31(4, 5);
+  } else {
+    P2 = I332(3, 7, 4);
+    P3 = I521(4, 7, 5);
+  }
+} break;
+case 201 : 
+case 205 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+} break;
+case 202 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 203 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 206 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 207 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P1 = I31(4, 5);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I521(4, 1, 5);
+  }
+} break;
+case 208 : 
+case 209 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 210 : 
+case 216 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 211 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 212 : 
+case 213 : 
+{
+  P0 = I211(4, 1, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P1 = I31(4, 1);
+    P3 = IC(4);
+  } else {
+    P1 = I521(4, 5, 1);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 215 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 217 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 218 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 219 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 220 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 221 : 
+{
+  P0 = I31(4, 1);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P1 = I31(4, 1);
+    P3 = IC(4);
+  } else {
+    P1 = I521(4, 5, 1);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 223 : 
+{
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 224 : 
+case 225 : 
+case 228 : 
+case 229 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 226 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 227 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 230 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 231 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 232 : 
+case 236 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+    P3 = I31(4, 5);
+  } else {
+    P2 = I332(3, 7, 4);
+    P3 = I521(4, 7, 5);
+  }
+} break;
+case 233 : 
+case 237 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+} break;
+case 234 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 235 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 238 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+    P3 = I31(4, 5);
+  } else {
+    P2 = I332(3, 7, 4);
+    P3 = I521(4, 7, 5);
+  }
+} break;
+case 239 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 240 : 
+case 241 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  if (HQ2X_MDR) {
+    P2 = I31(4, 3);
+    P3 = IC(4);
+  } else {
+    P2 = I521(4, 7, 3);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 242 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 243 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  if (HQ2X_MDR) {
+    P2 = I31(4, 3);
+    P3 = IC(4);
+  } else {
+    P2 = I521(4, 7, 3);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 244 : 
+case 245 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+} break;
+case 246 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 247 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 249 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 251 : 
+{
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 252 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+} break;
+case 253 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+} break;
+case 254 : 
+{
+  P0 = I31(4, 0);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 255 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
diff --git a/Source/GlideHQ/TextureFilters_hq4x.cpp b/Source/GlideHQ/TextureFilters_hq4x.cpp
new file mode 100644
index 000000000..89c14ea21
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_hq4x.cpp
@@ -0,0 +1,892 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*  Based on Maxim Stepin and Rice1964 hq4x code */
+
+#include <math.h>
+#include <stdlib.h>
+#include "TextureFilters.h"
+
+#if !_16BPP_HACK
+static uint32 RGB444toYUV[4096];
+#define RGB444toYUV(val) RGB444toYUV[val & 0x0FFF]   /* val = ARGB4444 */
+
+/*inline static uint32 RGB444toYUV(uint32 val)
+{
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0x0F00) >> 4;
+  g = (val & 0x00F0);
+  b = val & 0x000F;
+  r |= r >> 4;
+  g |= g >> 4;
+  b |= b << 4;
+
+  Y = (r + g + b) >> 2;
+  u = 128 + ((r - b) >> 2);
+  v = 128 + ((2*g - r - b)>>3);
+
+  return ((Y << 16) | (u << 8) | v);
+}*/
+
+static uint32 RGB555toYUV(uint32 val)
+{
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0x7C00) >> 7;
+  g = (val & 0x03E0) >> 2;
+  b = (val & 0x001F) << 3;
+  r |= r >> 5;
+  g |= g >> 5;
+  b |= b >> 5;
+
+  Y = (r + g + b) >> 2;
+  u = 128 + ((r - b) >> 2);
+  v = 128 + ((2*g - r - b)>>3);
+
+  return ((Y << 16) | (u << 8) | v);
+}
+
+static uint32 RGB565toYUV(uint32 val)
+{
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0xF800) >> 8;
+  g = (val & 0x07E0) >> 3;
+  b = (val & 0x001F) << 3;
+  r |= r >> 5;
+  g |= g >> 6;
+  b |= b >> 5;
+
+  Y = (r + g + b) >> 2;
+  u = 128 + ((r - b) >> 2);
+  v = 128 + ((2*g - r - b)>>3);
+
+  return ((Y << 16) | (u << 8) | v);
+}
+#endif /* !_16BPP_HACK */
+
+static uint32 RGB888toYUV(uint32 val)
+{
+#if 0
+  uint32 Yuv;
+
+  __asm {
+    mov eax, dword ptr [val];
+    mov ebx, eax;
+    mov ecx, eax;
+    and ebx, 0x000000ff; // b
+    and eax, 0x00ff0000; // r
+    and ecx, 0x0000ff00; // g
+    shl ebx, 14;
+    shr eax, 2;
+    shl ecx, 6;
+    mov edx, ebx;
+    add edx, eax;
+    add edx, ecx;
+    and edx, 0xffff0000;
+
+    sub eax, ebx;
+    add eax, 0x00800000;
+    shr eax, 8;
+    or  edx, eax;
+    sub eax, 0x00800000;
+    and edx, 0xffffff00;
+
+    add ecx, 0x00800000;
+    shr ecx, 5;
+    shr ebx, 7;
+    add eax, ebx;
+    sub ecx, eax;
+    shr ecx, 11;
+    or  edx, ecx;
+
+    mov dword ptr [Yuv], edx;
+  }
+
+  return Yuv;
+#else
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0x00ff0000) >> 16;
+  g = (val & 0x0000ff00) >> 8;
+  b = val & 0x000000ff;
+
+  Y = (r + g + b) >> 2;
+  u = (0x00000200 + r - b) >> 2;
+  v = (0x00000400 + (g << 1) - r - b) >> 3;
+
+  return ((Y << 16) | (u << 8) | v);
+#endif
+}
+
+#define Ymask 0x00FF0000
+#define Umask 0x0000FF00
+#define Vmask 0x000000FF
+#define trY 0x00300000 // ?
+#define trU 0x00000700 // ??
+#define trV 0x00000006 // ???
+
+#define HQ4X_INTERP1(n, b) \
+static void hq4x_Interp1_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /* *((uint##b*)pc) = (p1*3+p2) >> 2; */ \
+  *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*3 + INTERP_##n##_MASK_1_3(p2)) / 4) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*3 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 4 ); \
+}
+
+#define HQ4X_INTERP2(n, b) \
+static void hq4x_Interp2_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
+{ \
+  /**((uint##b*)pc) = (p1*2+p2+p3) >> 2;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*2 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 4) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*2 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 4); \
+}
+
+#define HQ4X_INTERP3(n, b) \
+static void hq4x_Interp3_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /**((uint##b*)pc) = (p1*7+p2)/8;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*7 + INTERP_##n##_MASK_1_3(p2)) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*7 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 8); \
+}
+
+#define HQ4X_INTERP5(n, b) \
+static void hq4x_Interp5_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /**((uint##b*)pc) = (p1+p2) >> 1;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1) + INTERP_##n##_MASK_1_3(p2)) / 2) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1) + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 2); \
+}
+
+#define HQ4X_INTERP6(n, b) \
+static void hq4x_Interp6_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
+{ \
+  /**((uint##b*)pc) = (p1*5+p2*2+p3)/8;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*2 + INTERP_##n##_MASK_1_3(p3)) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*2 + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \
+}
+
+#define HQ4X_INTERP7(n, b) \
+static void hq4x_Interp7_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
+{ \
+  /**((uint##b*)pc) = (p1*6+p2+p3)/8;*/ \
+  *((uint##b*)pc) =   INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*6 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*6 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \
+}
+
+#define HQ4X_INTERP8(n, b) \
+static void hq4x_Interp8_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /**((uint##b*)pc) = (p1*5+p2*3)/8;*/ \
+  *((uint##b*)pc) =   INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*3) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*3) / 8); \
+}
+
+#if !_16BPP_HACK
+#define INTERP_4444_MASK_1_3(v)           (v & 0x0F0F)
+#define INTERP_4444_MASK_SHIFT_2_4(v)     ((v & 0xF0F0) >> 4)
+#define INTERP_4444_MASK_SHIFTBACK_2_4(v) (INTERP_4444_MASK_1_3(v) << 4)
+HQ4X_INTERP1(4444, 16)
+HQ4X_INTERP2(4444, 16)
+HQ4X_INTERP3(4444, 16)
+HQ4X_INTERP5(4444, 16)
+HQ4X_INTERP6(4444, 16)
+HQ4X_INTERP7(4444, 16)
+HQ4X_INTERP8(4444, 16)
+
+#define INTERP_1555_MASK_1_3(v)           (v & 0x7C1F)
+#define INTERP_1555_MASK_SHIFT_2_4(v)     ((v & 0x83E0) >> 5)
+#define INTERP_1555_MASK_SHIFTBACK_2_4(v) (INTERP_1555_MASK_1_3(v) << 5)
+HQ4X_INTERP1(1555, 16)
+HQ4X_INTERP2(1555, 16)
+HQ4X_INTERP3(1555, 16)
+HQ4X_INTERP5(1555, 16)
+HQ4X_INTERP6(1555, 16)
+HQ4X_INTERP7(1555, 16)
+HQ4X_INTERP8(1555, 16)
+
+#define INTERP_565_MASK_1_3(v)           (v & 0xF81F)
+#define INTERP_565_MASK_SHIFT_2_4(v)     ((v & 0x7E0) >> 5)
+#define INTERP_565_MASK_SHIFTBACK_2_4(v) (INTERP_565_MASK_1_3(v) << 5)
+HQ4X_INTERP1(565, 16)
+HQ4X_INTERP2(565, 16)
+HQ4X_INTERP3(565, 16)
+HQ4X_INTERP5(565, 16)
+HQ4X_INTERP6(565, 16)
+HQ4X_INTERP7(565, 16)
+HQ4X_INTERP8(565, 16)
+#endif /* !_16BPP_HACK */
+
+#define INTERP_8888_MASK_1_3(v)           (v & 0x00FF00FF)
+#define INTERP_8888_MASK_SHIFT_2_4(v)     ((v & 0xFF00FF00) >> 8)
+#define INTERP_8888_MASK_SHIFTBACK_2_4(v) (INTERP_8888_MASK_1_3(v) << 8)
+HQ4X_INTERP1(8888, 32)
+HQ4X_INTERP2(8888, 32)
+HQ4X_INTERP3(8888, 32)
+HQ4X_INTERP5(8888, 32)
+HQ4X_INTERP6(8888, 32)
+HQ4X_INTERP7(8888, 32)
+HQ4X_INTERP8(8888, 32)
+
+#define PIXEL00_0     *((int*)(pOut)) = c[5];
+#define PIXEL00_11    hq4x_Interp1(pOut, c[5], c[4]);
+#define PIXEL00_12    hq4x_Interp1(pOut, c[5], c[2]);
+#define PIXEL00_20    hq4x_Interp2(pOut, c[5], c[2], c[4]);
+#define PIXEL00_50    hq4x_Interp5(pOut, c[2], c[4]);
+#define PIXEL00_80    hq4x_Interp8(pOut, c[5], c[1]);
+#define PIXEL00_81    hq4x_Interp8(pOut, c[5], c[4]);
+#define PIXEL00_82    hq4x_Interp8(pOut, c[5], c[2]);
+#define PIXEL01_0     *((int*)(pOut+BPP)) = c[5];
+#define PIXEL01_10    hq4x_Interp1(pOut+BPP, c[5], c[1]);
+#define PIXEL01_12    hq4x_Interp1(pOut+BPP, c[5], c[2]);
+#define PIXEL01_14    hq4x_Interp1(pOut+BPP, c[2], c[5]);
+#define PIXEL01_21    hq4x_Interp2(pOut+BPP, c[2], c[5], c[4]);
+#define PIXEL01_31    hq4x_Interp3(pOut+BPP, c[5], c[4]);
+#define PIXEL01_50    hq4x_Interp5(pOut+BPP, c[2], c[5]);
+#define PIXEL01_60    hq4x_Interp6(pOut+BPP, c[5], c[2], c[4]);
+#define PIXEL01_61    hq4x_Interp6(pOut+BPP, c[5], c[2], c[1]);
+#define PIXEL01_82    hq4x_Interp8(pOut+BPP, c[5], c[2]);
+#define PIXEL01_83    hq4x_Interp8(pOut+BPP, c[2], c[4]);
+#define PIXEL02_0     *((int*)(pOut+BPP2)) = c[5];
+#define PIXEL02_10    hq4x_Interp1(pOut+BPP2, c[5], c[3]);
+#define PIXEL02_11    hq4x_Interp1(pOut+BPP2, c[5], c[2]);
+#define PIXEL02_13    hq4x_Interp1(pOut+BPP2, c[2], c[5]);
+#define PIXEL02_21    hq4x_Interp2(pOut+BPP2, c[2], c[5], c[6]);
+#define PIXEL02_32    hq4x_Interp3(pOut+BPP2, c[5], c[6]);
+#define PIXEL02_50    hq4x_Interp5(pOut+BPP2, c[2], c[5]);
+#define PIXEL02_60    hq4x_Interp6(pOut+BPP2, c[5], c[2], c[6]);
+#define PIXEL02_61    hq4x_Interp6(pOut+BPP2, c[5], c[2], c[3]);
+#define PIXEL02_81    hq4x_Interp8(pOut+BPP2, c[5], c[2]);
+#define PIXEL02_83    hq4x_Interp8(pOut+BPP2, c[2], c[6]);
+#define PIXEL03_0     *((int*)(pOut+BPP3)) = c[5];
+#define PIXEL03_11    hq4x_Interp1(pOut+BPP3, c[5], c[2]);
+#define PIXEL03_12    hq4x_Interp1(pOut+BPP3, c[5], c[6]);
+#define PIXEL03_20    hq4x_Interp2(pOut+BPP3, c[5], c[2], c[6]);
+#define PIXEL03_50    hq4x_Interp5(pOut+BPP3, c[2], c[6]);
+#define PIXEL03_80    hq4x_Interp8(pOut+BPP3, c[5], c[3]);
+#define PIXEL03_81    hq4x_Interp8(pOut+BPP3, c[5], c[2]);
+#define PIXEL03_82    hq4x_Interp8(pOut+BPP3, c[5], c[6]);
+#define PIXEL10_0     *((int*)(pOut+BpL)) = c[5];
+#define PIXEL10_10    hq4x_Interp1(pOut+BpL, c[5], c[1]);
+#define PIXEL10_11    hq4x_Interp1(pOut+BpL, c[5], c[4]);
+#define PIXEL10_13    hq4x_Interp1(pOut+BpL, c[4], c[5]);
+#define PIXEL10_21    hq4x_Interp2(pOut+BpL, c[4], c[5], c[2]);
+#define PIXEL10_32    hq4x_Interp3(pOut+BpL, c[5], c[2]);
+#define PIXEL10_50    hq4x_Interp5(pOut+BpL, c[4], c[5]);
+#define PIXEL10_60    hq4x_Interp6(pOut+BpL, c[5], c[4], c[2]);
+#define PIXEL10_61    hq4x_Interp6(pOut+BpL, c[5], c[4], c[1]);
+#define PIXEL10_81    hq4x_Interp8(pOut+BpL, c[5], c[4]);
+#define PIXEL10_83    hq4x_Interp8(pOut+BpL, c[4], c[2]);
+#define PIXEL11_0     *((int*)(pOut+BpL+BPP)) = c[5];
+#define PIXEL11_30    hq4x_Interp3(pOut+BpL+BPP, c[5], c[1]);
+#define PIXEL11_31    hq4x_Interp3(pOut+BpL+BPP, c[5], c[4]);
+#define PIXEL11_32    hq4x_Interp3(pOut+BpL+BPP, c[5], c[2]);
+#define PIXEL11_70    hq4x_Interp7(pOut+BpL+BPP, c[5], c[4], c[2]);
+#define PIXEL12_0     *((int*)(pOut+BpL+BPP2)) = c[5];
+#define PIXEL12_30    hq4x_Interp3(pOut+BpL+BPP2, c[5], c[3]);
+#define PIXEL12_31    hq4x_Interp3(pOut+BpL+BPP2, c[5], c[2]);
+#define PIXEL12_32    hq4x_Interp3(pOut+BpL+BPP2, c[5], c[6]);
+#define PIXEL12_70    hq4x_Interp7(pOut+BpL+BPP2, c[5], c[6], c[2]);
+#define PIXEL13_0     *((int*)(pOut+BpL+BPP3)) = c[5];
+#define PIXEL13_10    hq4x_Interp1(pOut+BpL+BPP3, c[5], c[3]);
+#define PIXEL13_12    hq4x_Interp1(pOut+BpL+BPP3, c[5], c[6]);
+#define PIXEL13_14    hq4x_Interp1(pOut+BpL+BPP3, c[6], c[5]);
+#define PIXEL13_21    hq4x_Interp2(pOut+BpL+BPP3, c[6], c[5], c[2]);
+#define PIXEL13_31    hq4x_Interp3(pOut+BpL+BPP3, c[5], c[2]);
+#define PIXEL13_50    hq4x_Interp5(pOut+BpL+BPP3, c[6], c[5]);
+#define PIXEL13_60    hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[2]);
+#define PIXEL13_61    hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[3]);
+#define PIXEL13_82    hq4x_Interp8(pOut+BpL+BPP3, c[5], c[6]);
+#define PIXEL13_83    hq4x_Interp8(pOut+BpL+BPP3, c[6], c[2]);
+#define PIXEL20_0     *((int*)(pOut+BpL+BpL)) = c[5];
+#define PIXEL20_10    hq4x_Interp1(pOut+BpL+BpL, c[5], c[7]);
+#define PIXEL20_12    hq4x_Interp1(pOut+BpL+BpL, c[5], c[4]);
+#define PIXEL20_14    hq4x_Interp1(pOut+BpL+BpL, c[4], c[5]);
+#define PIXEL20_21    hq4x_Interp2(pOut+BpL+BpL, c[4], c[5], c[8]);
+#define PIXEL20_31    hq4x_Interp3(pOut+BpL+BpL, c[5], c[8]);
+#define PIXEL20_50    hq4x_Interp5(pOut+BpL+BpL, c[4], c[5]);
+#define PIXEL20_60    hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[8]);
+#define PIXEL20_61    hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[7]);
+#define PIXEL20_82    hq4x_Interp8(pOut+BpL+BpL, c[5], c[4]);
+#define PIXEL20_83    hq4x_Interp8(pOut+BpL+BpL, c[4], c[8]);
+#define PIXEL21_0     *((int*)(pOut+BpL+BpL+BPP)) = c[5];
+#define PIXEL21_30    hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[7]);
+#define PIXEL21_31    hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[8]);
+#define PIXEL21_32    hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[4]);
+#define PIXEL21_70    hq4x_Interp7(pOut+BpL+BpL+BPP, c[5], c[4], c[8]);
+#define PIXEL22_0     *((int*)(pOut+BpL+BpL+BPP2)) = c[5];
+#define PIXEL22_30    hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[9]);
+#define PIXEL22_31    hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[6]);
+#define PIXEL22_32    hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[8]);
+#define PIXEL22_70    hq4x_Interp7(pOut+BpL+BpL+BPP2, c[5], c[6], c[8]);
+#define PIXEL23_0     *((int*)(pOut+BpL+BpL+BPP3)) = c[5];
+#define PIXEL23_10    hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[9]);
+#define PIXEL23_11    hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL23_13    hq4x_Interp1(pOut+BpL+BpL+BPP3, c[6], c[5]);
+#define PIXEL23_21    hq4x_Interp2(pOut+BpL+BpL+BPP3, c[6], c[5], c[8]);
+#define PIXEL23_32    hq4x_Interp3(pOut+BpL+BpL+BPP3, c[5], c[8]);
+#define PIXEL23_50    hq4x_Interp5(pOut+BpL+BpL+BPP3, c[6], c[5]);
+#define PIXEL23_60    hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[8]);
+#define PIXEL23_61    hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[9]);
+#define PIXEL23_81    hq4x_Interp8(pOut+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL23_83    hq4x_Interp8(pOut+BpL+BpL+BPP3, c[6], c[8]);
+#define PIXEL30_0     *((int*)(pOut+BpL+BpL+BpL)) = c[5];
+#define PIXEL30_11    hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[8]);
+#define PIXEL30_12    hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[4]);
+#define PIXEL30_20    hq4x_Interp2(pOut+BpL+BpL+BpL, c[5], c[8], c[4]);
+#define PIXEL30_50    hq4x_Interp5(pOut+BpL+BpL+BpL, c[8], c[4]);
+#define PIXEL30_80    hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[7]);
+#define PIXEL30_81    hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[8]);
+#define PIXEL30_82    hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[4]);
+#define PIXEL31_0     *((int*)(pOut+BpL+BpL+BpL+BPP)) = c[5];
+#define PIXEL31_10    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[7]);
+#define PIXEL31_11    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[8]);
+#define PIXEL31_13    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[8], c[5]);
+#define PIXEL31_21    hq4x_Interp2(pOut+BpL+BpL+BpL+BPP, c[8], c[5], c[4]);
+#define PIXEL31_32    hq4x_Interp3(pOut+BpL+BpL+BpL+BPP, c[5], c[4]);
+#define PIXEL31_50    hq4x_Interp5(pOut+BpL+BpL+BpL+BPP, c[8], c[5]);
+#define PIXEL31_60    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[4]);
+#define PIXEL31_61    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[7]);
+#define PIXEL31_81    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[5], c[8]);
+#define PIXEL31_83    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[8], c[4]);
+#define PIXEL32_0     *((int*)(pOut+BpL+BpL+BpL+BPP2)) = c[5];
+#define PIXEL32_10    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[9]);
+#define PIXEL32_12    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]);
+#define PIXEL32_14    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]);
+#define PIXEL32_21    hq4x_Interp2(pOut+BpL+BpL+BpL+BPP2, c[8], c[5], c[6]);
+#define PIXEL32_31    hq4x_Interp3(pOut+BpL+BpL+BpL+BPP2, c[5], c[6]);
+#define PIXEL32_50    hq4x_Interp5(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]);
+#define PIXEL32_60    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[6]);
+#define PIXEL32_61    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[9]);
+#define PIXEL32_82    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]);
+#define PIXEL32_83    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[8], c[6]);
+#define PIXEL33_0     *((int*)(pOut+BpL+BpL+BpL+BPP3)) = c[5];
+#define PIXEL33_11    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL33_12    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]);
+#define PIXEL33_20    hq4x_Interp2(pOut+BpL+BpL+BpL+BPP3, c[5], c[8], c[6]);
+#define PIXEL33_50    hq4x_Interp5(pOut+BpL+BpL+BpL+BPP3, c[8], c[6]);
+#define PIXEL33_80    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[9]);
+#define PIXEL33_81    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL33_82    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]);
+
+#define HQ4X_DIFF(n, b) \
+static int Diff_##n (uint##b w1, uint##b w2) \
+{ \
+  int YUV1, YUV2; \
+  YUV1 = RGB##n##toYUV(w1); \
+  YUV2 = RGB##n##toYUV(w2); \
+  return ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) || \
+           ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || \
+           ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ); \
+}
+
+HQ4X_DIFF(888, 32)
+
+#if !_16BPP_HACK
+HQ4X_DIFF(444, 16)
+HQ4X_DIFF(555, 16)
+HQ4X_DIFF(565, 16)
+
+void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_4444
+#define hq4x_Interp2 hq4x_Interp2_4444
+#define hq4x_Interp3 hq4x_Interp3_4444
+#define hq4x_Interp4 hq4x_Interp4_4444
+#define hq4x_Interp5 hq4x_Interp5_4444
+#define hq4x_Interp6 hq4x_Interp6_4444
+#define hq4x_Interp7 hq4x_Interp7_4444
+#define hq4x_Interp8 hq4x_Interp8_4444
+#define Diff Diff_444
+#define BPP   2
+#define BPP2  4
+#define BPP3  6
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint16  w[10];
+  uint16  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*2; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*2; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint16*)(pIn + prevline));
+      w[5] = *((uint16*)pIn);
+      w[8] = *((uint16*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint16*)(pIn + prevline - 2));
+        w[4] = *((uint16*)(pIn - 2));
+        w[7] = *((uint16*)(pIn + nextline - 2));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint16*)(pIn + prevline + 2));
+        w[6] = *((uint16*)(pIn + 2));
+        w[9] = *((uint16*)(pIn + nextline + 2));
+      }   else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB444toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB444toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=2;
+      pOut+=8;
+    }
+    pIn += 2*(SrcPPL-Xres);
+    pOut+= 8*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+
+void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_1555
+#define hq4x_Interp2 hq4x_Interp2_1555
+#define hq4x_Interp3 hq4x_Interp3_1555
+#define hq4x_Interp4 hq4x_Interp4_1555
+#define hq4x_Interp5 hq4x_Interp5_1555
+#define hq4x_Interp6 hq4x_Interp6_1555
+#define hq4x_Interp7 hq4x_Interp7_1555
+#define hq4x_Interp8 hq4x_Interp8_1555
+#define Diff Diff_555
+#define BPP   2
+#define BPP2  4
+#define BPP3  6
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint16  w[10];
+  uint16  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*2; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*2; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint16*)(pIn + prevline));
+      w[5] = *((uint16*)pIn);
+      w[8] = *((uint16*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint16*)(pIn + prevline - 2));
+        w[4] = *((uint16*)(pIn - 2));
+        w[7] = *((uint16*)(pIn + nextline - 2));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint16*)(pIn + prevline + 2));
+        w[6] = *((uint16*)(pIn + 2));
+        w[9] = *((uint16*)(pIn + nextline + 2));
+      }   else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB555toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB555toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=2;
+      pOut+=8;
+    }
+    pIn += 2*(SrcPPL-Xres);
+    pOut+= 8*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+
+void hq4x_565(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_565
+#define hq4x_Interp2 hq4x_Interp2_565
+#define hq4x_Interp3 hq4x_Interp3_565
+#define hq4x_Interp4 hq4x_Interp4_565
+#define hq4x_Interp5 hq4x_Interp5_565
+#define hq4x_Interp6 hq4x_Interp6_565
+#define hq4x_Interp7 hq4x_Interp7_565
+#define hq4x_Interp8 hq4x_Interp8_565
+#define Diff Diff_565
+#define BPP   2
+#define BPP2  4
+#define BPP3  6
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint16  w[10];
+  uint16  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*2; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*2; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint16*)(pIn + prevline));
+      w[5] = *((uint16*)pIn);
+      w[8] = *((uint16*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint16*)(pIn + prevline - 2));
+        w[4] = *((uint16*)(pIn - 2));
+        w[7] = *((uint16*)(pIn + nextline - 2));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint16*)(pIn + prevline + 2));
+        w[6] = *((uint16*)(pIn + 2));
+        w[9] = *((uint16*)(pIn + nextline + 2));
+      } else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB565toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB565toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=2;
+      pOut+=8;
+    }
+    pIn += 2*(SrcPPL-Xres);
+    pOut+= 8*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+#endif /* !_16BPP_HACK */
+
+void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_8888
+#define hq4x_Interp2 hq4x_Interp2_8888
+#define hq4x_Interp3 hq4x_Interp3_8888
+#define hq4x_Interp4 hq4x_Interp4_8888
+#define hq4x_Interp5 hq4x_Interp5_8888
+#define hq4x_Interp6 hq4x_Interp6_8888
+#define hq4x_Interp7 hq4x_Interp7_8888
+#define hq4x_Interp8 hq4x_Interp8_8888
+#define Diff Diff_888
+#define BPP  4
+#define BPP2 8
+#define BPP3 12
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint32  w[10];
+  uint32  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*4; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*4; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint32*)(pIn + prevline));
+      w[5] = *((uint32*)pIn);
+      w[8] = *((uint32*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint32*)(pIn + prevline - 4));
+        w[4] = *((uint32*)(pIn - 4));
+        w[7] = *((uint32*)(pIn + nextline - 4));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint32*)(pIn + prevline + 4));
+        w[6] = *((uint32*)(pIn + 4));
+        w[9] = *((uint32*)(pIn + nextline + 4));
+      } else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB888toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB888toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=4;
+      pOut+=16;
+    }
+
+    pIn += 4*(SrcPPL-Xres);
+    pOut+= 16*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+
+#if !_16BPP_HACK
+void hq4x_init(void)
+{
+  static int done = 0;
+  int r, g, b, Y, u, v, i, j, k;
+
+  if (done ) return;
+
+  for (i = 0; i < 16; i++) {
+    for (j = 0; j < 16; j++) {
+      for (k = 0; k < 16; k++) {
+        r = (i << 4) | i;
+        g = (j << 4) | j;
+        b = (k << 4) | k;
+
+        /* Microsoft's RGB888->YUV conversion */
+        /*Y = (((  66 * r + 129 * g +  25 * b + 128) >> 8) + 16) & 0xFF;
+        u = ((( -38 * r -  74 * g + 112 * b + 128) >> 8) + 128) & 0xFF;
+        v = ((( 112 * r -  94 * g -  18 * b + 128) >> 8) + 128) & 0xFF;*/
+
+        Y = (r + g + b) >> 2;
+        u = 128 + ((r - b) >> 2);
+        v = 128 + ((-r + 2*g -b)>>3);
+
+        RGB444toYUV[(i << 8) | (j << 4) | k] = (Y << 16) | (u << 8) | v;
+      }
+    }
+  }
+
+  done = 1;
+}
+#endif /* !_16BPP_HACK */
diff --git a/Source/GlideHQ/TextureFilters_hq4x.h b/Source/GlideHQ/TextureFilters_hq4x.h
new file mode 100644
index 000000000..a3a27403c
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_hq4x.h
@@ -0,0 +1,4999 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*  Based on Maxim Stepin and Rice1964 hq4x code */
+
+      switch (pattern)
+      {
+        case 0:
+        case 1:
+        case 4:
+        case 32:
+        case 128:
+        case 5:
+        case 132:
+        case 160:
+        case 33:
+        case 129:
+        case 36:
+        case 133:
+        case 164:
+        case 161:
+        case 37:
+        case 165:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 2:
+        case 34:
+        case 130:
+        case 162:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 16:
+        case 17:
+        case 48:
+        case 49:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 64:
+        case 65:
+        case 68:
+        case 69:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 8:
+        case 12:
+        case 136:
+        case 140:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 3:
+        case 35:
+        case 131:
+        case 163:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 6:
+        case 38:
+        case 134:
+        case 166:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 20:
+        case 21:
+        case 52:
+        case 53:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 144:
+        case 145:
+        case 176:
+        case 177:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 192:
+        case 193:
+        case 196:
+        case 197:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 96:
+        case 97:
+        case 100:
+        case 101:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 40:
+        case 44:
+        case 168:
+        case 172:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 9:
+        case 13:
+        case 137:
+        case 141:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 18:
+        case 50:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL12_0
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 80:
+        case 81:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 72:
+        case 76:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL21_0
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 10:
+        case 138:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+            PIXEL11_0
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 66:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 24:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 7:
+        case 39:
+        case 135:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 148:
+        case 149:
+        case 180:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 224:
+        case 228:
+        case 225:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 41:
+        case 169:
+        case 45:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 22:
+        case 54:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 208:
+        case 209:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 104:
+        case 108:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 11:
+        case 139:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 19:
+        case 51:
+        {
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL00_81
+            PIXEL01_31
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL00_12
+            PIXEL01_14
+            PIXEL02_83
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_21
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 146:
+        case 178:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+            PIXEL23_32
+            PIXEL33_82
+          }
+          else
+          {
+            PIXEL02_21
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_83
+            PIXEL23_13
+            PIXEL33_11
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          break;
+        }
+        case 84:
+        case 85:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL03_81
+            PIXEL13_31
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL03_12
+            PIXEL13_14
+            PIXEL22_70
+            PIXEL23_83
+            PIXEL32_21
+            PIXEL33_50
+          }
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 112:
+        case 113:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL30_82
+            PIXEL31_32
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_70
+            PIXEL23_21
+            PIXEL30_11
+            PIXEL31_13
+            PIXEL32_83
+            PIXEL33_50
+          }
+          break;
+        }
+        case 200:
+        case 204:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+            PIXEL32_31
+            PIXEL33_81
+          }
+          else
+          {
+            PIXEL20_21
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_83
+            PIXEL32_14
+            PIXEL33_12
+          }
+          PIXEL22_31
+          PIXEL23_81
+          break;
+        }
+        case 73:
+        case 77:
+        {
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL00_82
+            PIXEL10_32
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL00_11
+            PIXEL10_13
+            PIXEL20_83
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_21
+          }
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 42:
+        case 170:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+            PIXEL20_31
+            PIXEL30_81
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_21
+            PIXEL10_83
+            PIXEL11_70
+            PIXEL20_14
+            PIXEL30_12
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 14:
+        case 142:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL02_32
+            PIXEL03_82
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_83
+            PIXEL02_13
+            PIXEL03_11
+            PIXEL10_21
+            PIXEL11_70
+          }
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 67:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 70:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 28:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 152:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 194:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 98:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 56:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 25:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 26:
+        case 31:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 82:
+        case 214:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 88:
+        case 248:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 74:
+        case 107:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 27:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 86:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 216:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 106:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 30:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 210:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 120:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 75:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 29:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 198:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 184:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 99:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 57:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 71:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 156:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 226:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 60:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 195:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 102:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 153:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 58:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 83:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_61
+          PIXEL21_30
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 92:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 202:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 78:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 154:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 114:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 89:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 90:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 55:
+        case 23:
+        {
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL00_81
+            PIXEL01_31
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL00_12
+            PIXEL01_14
+            PIXEL02_83
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_21
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 182:
+        case 150:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+            PIXEL23_32
+            PIXEL33_82
+          }
+          else
+          {
+            PIXEL02_21
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_83
+            PIXEL23_13
+            PIXEL33_11
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          break;
+        }
+        case 213:
+        case 212:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL03_81
+            PIXEL13_31
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL03_12
+            PIXEL13_14
+            PIXEL22_70
+            PIXEL23_83
+            PIXEL32_21
+            PIXEL33_50
+          }
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 241:
+        case 240:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL30_82
+            PIXEL31_32
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL22_70
+            PIXEL23_21
+            PIXEL30_11
+            PIXEL31_13
+            PIXEL32_83
+            PIXEL33_50
+          }
+          break;
+        }
+        case 236:
+        case 232:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+            PIXEL32_31
+            PIXEL33_81
+          }
+          else
+          {
+            PIXEL20_21
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_83
+            PIXEL32_14
+            PIXEL33_12
+          }
+          PIXEL22_31
+          PIXEL23_81
+          break;
+        }
+        case 109:
+        case 105:
+        {
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL00_82
+            PIXEL10_32
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL00_11
+            PIXEL10_13
+            PIXEL20_83
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_21
+          }
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 171:
+        case 43:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+            PIXEL11_0
+            PIXEL20_31
+            PIXEL30_81
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_21
+            PIXEL10_83
+            PIXEL11_70
+            PIXEL20_14
+            PIXEL30_12
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 143:
+        case 15:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL02_32
+            PIXEL03_82
+            PIXEL10_0
+            PIXEL11_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_83
+            PIXEL02_13
+            PIXEL03_11
+            PIXEL10_21
+            PIXEL11_70
+          }
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 124:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 203:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 62:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 211:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 118:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 217:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 110:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 155:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 188:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 185:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 61:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 157:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 103:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 227:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 230:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 199:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 220:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 158:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 234:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 242:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 59:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL11_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 121:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 87:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL20_61
+          PIXEL21_30
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 79:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 122:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 94:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 218:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 91:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL11_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 229:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 167:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 173:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 181:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 186:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 115:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 93:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 206:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 205:
+        case 201:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 174:
+        case 46:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 179:
+        case 147:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 117:
+        case 116:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 189:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 231:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 126:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 219:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 125:
+        {
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL00_82
+            PIXEL10_32
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL00_11
+            PIXEL10_13
+            PIXEL20_83
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_21
+          }
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 221:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL03_81
+            PIXEL13_31
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL03_12
+            PIXEL13_14
+            PIXEL22_70
+            PIXEL23_83
+            PIXEL32_21
+            PIXEL33_50
+          }
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 207:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL02_32
+            PIXEL03_82
+            PIXEL10_0
+            PIXEL11_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_83
+            PIXEL02_13
+            PIXEL03_11
+            PIXEL10_21
+            PIXEL11_70
+          }
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 238:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+            PIXEL32_31
+            PIXEL33_81
+          }
+          else
+          {
+            PIXEL20_21
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_83
+            PIXEL32_14
+            PIXEL33_12
+          }
+          PIXEL22_31
+          PIXEL23_81
+          break;
+        }
+        case 190:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+            PIXEL23_32
+            PIXEL33_82
+          }
+          else
+          {
+            PIXEL02_21
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_83
+            PIXEL23_13
+            PIXEL33_11
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          break;
+        }
+        case 187:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+            PIXEL11_0
+            PIXEL20_31
+            PIXEL30_81
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_21
+            PIXEL10_83
+            PIXEL11_70
+            PIXEL20_14
+            PIXEL30_12
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 243:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL30_82
+            PIXEL31_32
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL22_70
+            PIXEL23_21
+            PIXEL30_11
+            PIXEL31_13
+            PIXEL32_83
+            PIXEL33_50
+          }
+          break;
+        }
+        case 119:
+        {
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL00_81
+            PIXEL01_31
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL00_12
+            PIXEL01_14
+            PIXEL02_83
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_21
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 237:
+        case 233:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 175:
+        case 47:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 183:
+        case 151:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 245:
+        case 244:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 250:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 123:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 95:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 222:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 252:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 249:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          break;
+        }
+        case 235:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 111:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 63:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 159:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 215:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 246:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 254:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 253:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 251:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          break;
+        }
+        case 239:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 127:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 191:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 223:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 247:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 255:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+      }
diff --git a/Source/GlideHQ/TextureFilters_lq2x.h b/Source/GlideHQ/TextureFilters_lq2x.h
new file mode 100644
index 000000000..b5318ab81
--- /dev/null
+++ b/Source/GlideHQ/TextureFilters_lq2x.h
@@ -0,0 +1,1307 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+case 0 : 
+case 2 : 
+case 4 : 
+case 6 : 
+case 8 : 
+case 12 : 
+case 16 : 
+case 20 : 
+case 24 : 
+case 28 : 
+case 32 : 
+case 34 : 
+case 36 : 
+case 38 : 
+case 40 : 
+case 44 : 
+case 48 : 
+case 52 : 
+case 56 : 
+case 60 : 
+case 64 : 
+case 66 : 
+case 68 : 
+case 70 : 
+case 96 : 
+case 98 : 
+case 100 : 
+case 102 : 
+case 128 : 
+case 130 : 
+case 132 : 
+case 134 : 
+case 136 : 
+case 140 : 
+case 144 : 
+case 148 : 
+case 152 : 
+case 156 : 
+case 160 : 
+case 162 : 
+case 164 : 
+case 166 : 
+case 168 : 
+case 172 : 
+case 176 : 
+case 180 : 
+case 184 : 
+case 188 : 
+case 192 : 
+case 194 : 
+case 196 : 
+case 198 : 
+case 224 : 
+case 226 : 
+case 228 : 
+case 230 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+} break;
+case 1 : 
+case 5 : 
+case 9 : 
+case 13 : 
+case 17 : 
+case 21 : 
+case 25 : 
+case 29 : 
+case 33 : 
+case 37 : 
+case 41 : 
+case 45 : 
+case 49 : 
+case 53 : 
+case 57 : 
+case 61 : 
+case 65 : 
+case 69 : 
+case 97 : 
+case 101 : 
+case 129 : 
+case 133 : 
+case 137 : 
+case 141 : 
+case 145 : 
+case 149 : 
+case 153 : 
+case 157 : 
+case 161 : 
+case 165 : 
+case 169 : 
+case 173 : 
+case 177 : 
+case 181 : 
+case 185 : 
+case 189 : 
+case 193 : 
+case 197 : 
+case 225 : 
+case 229 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  P3 = IC(1);
+} break;
+case 3 : 
+case 35 : 
+case 67 : 
+case 99 : 
+case 131 : 
+case 163 : 
+case 195 : 
+case 227 : 
+{
+  P0 = IC(2);
+  P1 = IC(2);
+  P2 = IC(2);
+  P3 = IC(2);
+} break;
+case 7 : 
+case 39 : 
+case 71 : 
+case 103 : 
+case 135 : 
+case 167 : 
+case 199 : 
+case 231 : 
+{
+  P0 = IC(3);
+  P1 = IC(3);
+  P2 = IC(3);
+  P3 = IC(3);
+} break;
+case 10 : 
+case 138 : 
+{
+  P1 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I211(0, 1, 3);
+  }
+} break;
+case 11 : 
+case 27 : 
+case 75 : 
+case 139 : 
+case 155 : 
+case 203 : 
+{
+  P1 = IC(2);
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 14 : 
+case 142 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+    P1 = IC(0);
+  } else {
+    P0 = I332(1, 3, 0);
+    P1 = I31(0, 1);
+  }
+} break;
+case 15 : 
+case 143 : 
+case 207 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P1 = IC(4);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I31(4, 1);
+  }
+} break;
+case 18 : 
+case 22 : 
+case 30 : 
+case 50 : 
+case 54 : 
+case 62 : 
+case 86 : 
+case 118 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 19 : 
+case 51 : 
+{
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUR) {
+    P0 = IC(2);
+    P1 = IC(2);
+  } else {
+    P0 = I31(2, 1);
+    P1 = I332(1, 5, 2);
+  }
+} break;
+case 23 : 
+case 55 : 
+case 119 : 
+{
+  P2 = IC(3);
+  P3 = IC(3);
+  if (HQ2X_MUR) {
+    P0 = IC(3);
+    P1 = IC(3);
+  } else {
+    P0 = I31(3, 1);
+    P1 = I332(1, 5, 3);
+  }
+} break;
+case 26 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I211(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 31 : 
+case 95 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 42 : 
+case 170 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+    P2 = IC(0);
+  } else {
+    P0 = I332(1, 3, 0);
+    P2 = I31(0, 3);
+  }
+} break;
+case 43 : 
+case 171 : 
+case 187 : 
+{
+  P1 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+    P2 = IC(2);
+  } else {
+    P0 = I332(1, 3, 2);
+    P2 = I31(2, 3);
+  }
+} break;
+case 46 : 
+case 174 : 
+{
+  P1 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+} break;
+case 47 : 
+case 175 : 
+{
+  P1 = IC(4);
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 58 : 
+case 154 : 
+case 186 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 59 : 
+{
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 63 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 72 : 
+case 76 : 
+case 104 : 
+case 106 : 
+case 108 : 
+case 110 : 
+case 120 : 
+case 124 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+} break;
+case 73 : 
+case 77 : 
+case 105 : 
+case 109 : 
+case 125 : 
+{
+  P1 = IC(1);
+  P3 = IC(1);
+  if (HQ2X_MDL) {
+    P0 = IC(1);
+    P2 = IC(1);
+  } else {
+    P0 = I31(1, 3);
+    P2 = I332(3, 7, 1);
+  }
+} break;
+case 74 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I211(0, 1, 3);
+  }
+} break;
+case 78 : 
+case 202 : 
+case 206 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+} break;
+case 79 : 
+{
+  P1 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 80 : 
+case 208 : 
+case 210 : 
+case 216 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+} break;
+case 81 : 
+case 209 : 
+case 217 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I211(1, 5, 7);
+  }
+} break;
+case 82 : 
+case 214 : 
+case 222 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 83 : 
+case 115 : 
+{
+  P0 = IC(2);
+  P2 = IC(2);
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I611(2, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 84 : 
+case 212 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P1 = IC(0);
+    P3 = IC(0);
+  } else {
+    P1 = I31(0, 5);
+    P3 = I332(5, 7, 0);
+  }
+} break;
+case 85 : 
+case 213 : 
+case 221 : 
+{
+  P0 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P1 = IC(1);
+    P3 = IC(1);
+  } else {
+    P1 = I31(1, 5);
+    P3 = I332(5, 7, 1);
+  }
+} break;
+case 87 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  if (HQ2X_MDR) {
+    P3 = IC(3);
+  } else {
+    P3 = I611(3, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I211(3, 1, 5);
+  }
+} break;
+case 88 : 
+case 248 : 
+case 250 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+} break;
+case 89 : 
+case 93 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I611(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I611(1, 5, 7);
+  }
+} break;
+case 90 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 91 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I611(2, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I611(2, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 92 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+} break;
+case 94 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 107 : 
+case 123 : 
+{
+  P1 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I211(2, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 111 : 
+{
+  P1 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 112 : 
+case 240 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDR) {
+    P2 = IC(0);
+    P3 = IC(0);
+  } else {
+    P2 = I31(0, 7);
+    P3 = I332(5, 7, 0);
+  }
+} break;
+case 113 : 
+case 241 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDR) {
+    P2 = IC(1);
+    P3 = IC(1);
+  } else {
+    P2 = I31(1, 7);
+    P3 = I332(5, 7, 1);
+  }
+} break;
+case 114 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 116 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+} break;
+case 117 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I611(1, 5, 7);
+  }
+} break;
+case 121 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I211(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I611(1, 5, 7);
+  }
+} break;
+case 122 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 126 : 
+{
+  P0 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 127 : 
+{
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 146 : 
+case 150 : 
+case 178 : 
+case 182 : 
+case 190 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+    P3 = IC(0);
+  } else {
+    P1 = I332(1, 5, 0);
+    P3 = I31(0, 5);
+  }
+} break;
+case 147 : 
+case 179 : 
+{
+  P0 = IC(2);
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 151 : 
+case 183 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  P3 = IC(3);
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I1411(3, 1, 5);
+  }
+} break;
+case 158 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 159 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 191 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 200 : 
+case 204 : 
+case 232 : 
+case 236 : 
+case 238 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+    P3 = IC(0);
+  } else {
+    P2 = I332(3, 7, 0);
+    P3 = I31(0, 7);
+  }
+} break;
+case 201 : 
+case 205 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P3 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I611(1, 3, 7);
+  }
+} break;
+case 211 : 
+{
+  P0 = IC(2);
+  P1 = IC(2);
+  P2 = IC(2);
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I211(2, 5, 7);
+  }
+} break;
+case 215 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  if (HQ2X_MDR) {
+    P3 = IC(3);
+  } else {
+    P3 = I211(3, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I1411(3, 1, 5);
+  }
+} break;
+case 218 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 219 : 
+{
+  P1 = IC(2);
+  P2 = IC(2);
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I211(2, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 220 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+} break;
+case 223 : 
+{
+  P2 = IC(4);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 233 : 
+case 237 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P3 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I1411(1, 3, 7);
+  }
+} break;
+case 234 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+} break;
+case 235 : 
+{
+  P1 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I1411(2, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 239 : 
+{
+  P1 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 242 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 243 : 
+{
+  P0 = IC(2);
+  P1 = IC(2);
+  if (HQ2X_MDR) {
+    P2 = IC(2);
+    P3 = IC(2);
+  } else {
+    P2 = I31(2, 7);
+    P3 = I332(5, 7, 2);
+  }
+} break;
+case 244 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+} break;
+case 245 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I1411(1, 5, 7);
+  }
+} break;
+case 246 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 247 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  if (HQ2X_MDR) {
+    P3 = IC(3);
+  } else {
+    P3 = I1411(3, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I1411(3, 1, 5);
+  }
+} break;
+case 249 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I1411(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I211(1, 5, 7);
+  }
+} break;
+case 251 : 
+{
+  P1 = IC(2);
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I1411(2, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I211(2, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 252 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+} break;
+case 253 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I1411(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I1411(1, 5, 7);
+  }
+} break;
+case 254 : 
+{
+  P0 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 255 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
diff --git a/Source/GlideHQ/TxCache.cpp b/Source/GlideHQ/TxCache.cpp
new file mode 100644
index 000000000..42f434551
--- /dev/null
+++ b/Source/GlideHQ/TxCache.cpp
@@ -0,0 +1,433 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef WIN32
+#pragma warning(disable: 4786)
+#endif
+
+#include "TxCache.h"
+#include "TxDbg.h"
+#include <zlib/zlib.h>
+#include <Common/path.h>
+
+TxCache::~TxCache()
+{
+  /* free memory, clean up, etc */
+  clear();
+
+  delete _txUtil;
+}
+
+TxCache::TxCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+                 dispInfoFuncExt callback)
+{
+  _txUtil = new TxUtil();
+
+  _options = options;
+  _cacheSize = cachesize;
+  _callback = callback;
+  _totalSize = 0;
+
+  /* save path name */
+  if (path)
+    _path.assign(path);
+
+  /* save ROM name */
+  if (ident)
+    _ident.assign(ident);
+
+  /* zlib memory buffers to (de)compress hires textures */
+  if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) {
+    _gzdest0   = TxMemBuf::getInstance()->get(0);
+    _gzdest1   = TxMemBuf::getInstance()->get(1);
+    _gzdestLen = (TxMemBuf::getInstance()->size_of(0) < TxMemBuf::getInstance()->size_of(1)) ?
+                  TxMemBuf::getInstance()->size_of(0) : TxMemBuf::getInstance()->size_of(1);
+
+    if (!_gzdest0 || !_gzdest1 || !_gzdestLen) {
+      _options &= ~(GZ_TEXCACHE|GZ_HIRESTEXCACHE);
+      _gzdest0 = NULL;
+      _gzdest1 = NULL;
+      _gzdestLen = 0;
+    }
+  }
+}
+
+boolean
+TxCache::add(uint64 checksum, GHQTexInfo *info, int dataSize)
+{
+  /* NOTE: dataSize must be provided if info->data is zlib compressed. */
+
+  if (!checksum || !info->data) return 0;
+
+  uint8 *dest = info->data;
+  uint16 format = info->format;
+
+  if (!dataSize) {
+    dataSize = _txUtil->sizeofTx(info->width, info->height, info->format);
+
+    if (!dataSize) return 0;
+
+    if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) {
+      /* zlib compress it. compression level:1 (best speed) */
+      uint32 destLen = _gzdestLen;
+      dest = (dest == _gzdest0) ? _gzdest1 : _gzdest0;
+      if (compress2(dest, &destLen, info->data, dataSize, 1) != Z_OK) {
+        dest = info->data;
+        DBG_INFO(80, L"Error: zlib compression failed!\n");
+      } else {
+        DBG_INFO(80, L"zlib compressed: %.02fkb->%.02fkb\n", (float)dataSize/1000, (float)destLen/1000);
+        dataSize = destLen;
+        format |= GR_TEXFMT_GZ;
+      }
+    }
+  }
+
+  /* if cache size exceeds limit, remove old cache */
+  if (_cacheSize > 0) {
+    _totalSize += dataSize;
+    if ((_totalSize > _cacheSize) && !_cachelist.empty()) {
+      /* _cachelist is arranged so that frequently used textures are in the back */
+      std::list<uint64>::iterator itList = _cachelist.begin();
+      while (itList != _cachelist.end()) {
+        /* find it in _cache */
+        std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(*itList);
+        if (itMap != _cache.end()) {
+          /* yep we have it. remove it. */
+          _totalSize -= (*itMap).second->size;
+          free((*itMap).second->info.data);
+          delete (*itMap).second;
+          _cache.erase(itMap);
+        }
+        itList++;
+
+        /* check if memory cache has enough space */
+        if (_totalSize <= _cacheSize)
+          break;
+      }
+      /* remove from _cachelist */
+      _cachelist.erase(_cachelist.begin(), itList);
+
+      DBG_INFO(80, L"+++++++++\n");
+    }
+    _totalSize -= dataSize;
+  }
+
+  /* cache it */
+  uint8 *tmpdata = (uint8*)malloc(dataSize);
+  if (tmpdata) {
+    TXCACHE *txCache = new TXCACHE;
+    if (txCache) {
+      /* we can directly write as we filter, but for now we get away
+       * with doing memcpy after all the filtering is done.
+       */
+      memcpy(tmpdata, dest, dataSize);
+
+      /* copy it */
+      memcpy(&txCache->info, info, sizeof(GHQTexInfo));
+      txCache->info.data = tmpdata;
+      txCache->info.format = format;
+      txCache->size = dataSize;
+
+      /* add to cache */
+      if (_cacheSize > 0) {
+        _cachelist.push_back(checksum);
+        txCache->it = --(_cachelist.end());
+      }
+      /* _cache[checksum] = txCache; */
+      _cache.insert(std::map<uint64, TXCACHE*>::value_type(checksum, txCache));
+
+#ifdef DEBUG
+      DBG_INFO(80, L"[%5d] added!! crc:%08X %08X %d x %d gfmt:%x total:%.02fmb\n",
+               _cache.size(), (uint32)(checksum >> 32), (uint32)(checksum & 0xffffffff),
+               info->width, info->height, info->format, (float)_totalSize/1000000);
+
+      DBG_INFO(80, L"smalllodlog2:%d largelodlog2:%d aspectratiolog2:%d\n",
+               txCache->info.smallLodLog2, txCache->info.largeLodLog2, txCache->info.aspectRatioLog2);
+
+      if (info->tiles) {
+        DBG_INFO(80, L"tiles:%d un-tiled size:%d x %d\n", info->tiles, info->untiled_width, info->untiled_height);
+      }
+
+      if (_cacheSize > 0) {
+        DBG_INFO(80, L"cache max config:%.02fmb\n", (float)_cacheSize/1000000);
+
+        if (_cache.size() != _cachelist.size()) {
+          DBG_INFO(80, L"Error: cache/cachelist mismatch! (%d/%d)\n", _cache.size(), _cachelist.size());
+        }
+      }
+#endif
+
+      /* total cache size */
+      _totalSize += dataSize;
+
+      return 1;
+    }
+    free(tmpdata);
+  }
+
+  return 0;
+}
+
+boolean
+TxCache::get(uint64 checksum, GHQTexInfo *info)
+{
+  if (!checksum || _cache.empty()) return 0;
+
+  /* find a match in cache */
+  std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(checksum);
+  if (itMap != _cache.end()) {
+    /* yep, we've got it. */
+    memcpy(info, &(((*itMap).second)->info), sizeof(GHQTexInfo));
+
+    /* push it to the back of the list */
+    if (_cacheSize > 0) {
+      _cachelist.erase(((*itMap).second)->it);
+      _cachelist.push_back(checksum);
+      ((*itMap).second)->it = --(_cachelist.end());
+    }
+
+    /* zlib decompress it */
+    if (info->format & GR_TEXFMT_GZ) {
+      uint32 destLen = _gzdestLen;
+      uint8 *dest = (_gzdest0 == info->data) ? _gzdest1 : _gzdest0;
+      if (uncompress(dest, &destLen, info->data, ((*itMap).second)->size) != Z_OK) {
+        DBG_INFO(80, L"Error: zlib decompression failed!\n");
+        return 0;
+      }
+      info->data = dest;
+      info->format &= ~GR_TEXFMT_GZ;
+      DBG_INFO(80, L"zlib decompressed: %.02fkb->%.02fkb\n", (float)(((*itMap).second)->size)/1000, (float)destLen/1000);
+    }
+
+    return 1;
+  }
+
+  return 0;
+}
+
+boolean
+TxCache::save(const wchar_t *path, const wchar_t *filename, int config)
+{
+  if (!_cache.empty()) {
+    /* dump cache to disk */
+    char cbuf[MAX_PATH];
+
+	CPath cachepath(stdstr().FromUTF16(path),"");
+	cachepath.CreateDirectory();
+
+    /* Ugly hack to enable fopen/gzopen in Win9x */
+#ifdef WIN32
+    wchar_t curpath[MAX_PATH];
+    GETCWD(MAX_PATH, curpath);
+    cachepath.ChangeDirectory();
+#else
+    char curpath[MAX_PATH];
+    wcstombs(cbuf, cachepath.string().c_str(), MAX_PATH);
+    GETCWD(MAX_PATH, curpath);
+    CHDIR(cbuf);
+#endif
+
+#ifdef tofix
+    wcstombs(cbuf, filename, MAX_PATH);
+
+    gzFile gzfp = gzopen(cbuf, "wb1");
+    DBG_INFO(80, L"gzfp:%x file:%ls\n", gzfp, filename);
+    if (gzfp) {
+      /* write header to determine config match */
+      gzwrite(gzfp, &config, 4);
+
+      std::map<uint64, TXCACHE*>::iterator itMap = _cache.begin();
+      while (itMap != _cache.end()) {
+        uint8 *dest    = (*itMap).second->info.data;
+        uint32 destLen = (*itMap).second->size;
+        uint16 format  = (*itMap).second->info.format;
+
+        /* to keep things simple, we save the texture data in a zlib uncompressed state. */
+        /* sigh... for those who cannot wait the extra few seconds. changed to keep
+         * texture data in a zlib compressed state. if the GZ_TEXCACHE or GZ_HIRESTEXCACHE
+         * option is toggled, the cache will need to be rebuilt.
+         */
+        /*if (format & GR_TEXFMT_GZ) {
+          dest = _gzdest0;
+          destLen = _gzdestLen;
+          if (dest && destLen) {
+            if (uncompress(dest, &destLen, (*itMap).second->info.data, (*itMap).second->size) != Z_OK) {
+              dest = NULL;
+              destLen = 0;
+            }
+            format &= ~GR_TEXFMT_GZ;
+          }
+        }*/
+
+        if (dest && destLen) {
+          /* texture checksum */
+          gzwrite(gzfp, &((*itMap).first), 8);
+
+          /* other texture info */
+          gzwrite(gzfp, &((*itMap).second->info.width), 4);
+          gzwrite(gzfp, &((*itMap).second->info.height), 4);
+          gzwrite(gzfp, &format, 2);
+
+          gzwrite(gzfp, &((*itMap).second->info.smallLodLog2), 4);
+          gzwrite(gzfp, &((*itMap).second->info.largeLodLog2), 4);
+          gzwrite(gzfp, &((*itMap).second->info.aspectRatioLog2), 4);
+
+          gzwrite(gzfp, &((*itMap).second->info.tiles), 4);
+          gzwrite(gzfp, &((*itMap).second->info.untiled_width), 4);
+          gzwrite(gzfp, &((*itMap).second->info.untiled_height), 4);
+
+          gzwrite(gzfp, &((*itMap).second->info.is_hires_tex), 1);
+
+          gzwrite(gzfp, &destLen, 4);
+          gzwrite(gzfp, dest, destLen);
+        }
+
+        itMap++;
+
+        /* not ready yet */
+        /*if (_callback)
+          (*_callback)(L"Total textures saved to HDD: %d\n", std::distance(itMap, _cache.begin()));*/
+      }
+      gzclose(gzfp);
+    }
+
+    CHDIR(curpath);
+#endif
+  }
+  return _cache.empty();
+}
+
+boolean
+TxCache::load(const wchar_t *path, const wchar_t *filename, int config)
+{
+  /* find it on disk */
+  CPath cbuf(stdstr().FromUTF16(path).c_str(),stdstr().FromUTF16(filename).c_str());
+
+  gzFile gzfp = gzopen(cbuf, "rb");
+  DBG_INFO(80, L"gzfp:%x file:%ls\n", gzfp, filename);
+  if (gzfp) {
+    /* yep, we have it. load it into memory cache. */
+    int dataSize;
+    uint64 checksum;
+    GHQTexInfo tmpInfo;
+    int tmpconfig;
+    /* read header to determine config match */
+    gzread(gzfp, &tmpconfig, 4);
+
+    if (tmpconfig == config) {
+      do {
+        memset(&tmpInfo, 0, sizeof(GHQTexInfo));
+
+        gzread(gzfp, &checksum, 8);
+
+        gzread(gzfp, &tmpInfo.width, 4);
+        gzread(gzfp, &tmpInfo.height, 4);
+        gzread(gzfp, &tmpInfo.format, 2);
+
+        gzread(gzfp, &tmpInfo.smallLodLog2, 4);
+        gzread(gzfp, &tmpInfo.largeLodLog2, 4);
+        gzread(gzfp, &tmpInfo.aspectRatioLog2, 4);
+
+        gzread(gzfp, &tmpInfo.tiles, 4);
+        gzread(gzfp, &tmpInfo.untiled_width, 4);
+        gzread(gzfp, &tmpInfo.untiled_height, 4);
+
+        gzread(gzfp, &tmpInfo.is_hires_tex, 1);
+
+        gzread(gzfp, &dataSize, 4);
+
+        tmpInfo.data = (uint8*)malloc(dataSize);
+        if (tmpInfo.data) {
+          gzread(gzfp, tmpInfo.data, dataSize);
+
+          /* add to memory cache */
+          add(checksum, &tmpInfo, (tmpInfo.format & GR_TEXFMT_GZ) ? dataSize : 0);
+
+          free(tmpInfo.data);
+        } else {
+          gzseek(gzfp, dataSize, SEEK_CUR);
+        }
+
+        /* skip in between to prevent the loop from being tied down to vsync */
+        if (_callback && (!(_cache.size() % 100) || gzeof(gzfp)))
+          (*_callback)(L"[%d] total mem:%.02fmb - %ls\n", _cache.size(), (float)_totalSize/1000000, filename);
+
+      } while (!gzeof(gzfp));
+      gzclose(gzfp);
+    }
+  }
+
+  return !_cache.empty();
+}
+
+boolean
+TxCache::del(uint64 checksum)
+{
+  if (!checksum || _cache.empty()) return 0;
+
+  std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(checksum);
+  if (itMap != _cache.end()) {
+
+    /* for texture cache (not hi-res cache) */
+    if (!_cachelist.empty()) _cachelist.erase(((*itMap).second)->it);
+
+    /* remove from cache */
+    free((*itMap).second->info.data);
+    _totalSize -= (*itMap).second->size;
+    delete (*itMap).second;
+    _cache.erase(itMap);
+
+    DBG_INFO(80, L"removed from cache: checksum = %08X %08X\n", (uint32)(checksum & 0xffffffff), (uint32)(checksum >> 32));
+
+    return 1;
+  }
+
+  return 0;
+}
+
+boolean
+TxCache::is_cached(uint64 checksum)
+{
+  std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(checksum);
+  if (itMap != _cache.end()) return 1;
+
+  return 0;
+}
+
+void
+TxCache::clear()
+{
+  if (!_cache.empty()) {
+    std::map<uint64, TXCACHE*>::iterator itMap = _cache.begin();
+    while (itMap != _cache.end()) {
+      free((*itMap).second->info.data);
+      delete (*itMap).second;
+      itMap++;
+    }
+    _cache.clear();
+  }
+
+  if (!_cachelist.empty()) _cachelist.clear();
+
+  _totalSize = 0;
+}
diff --git a/Source/GlideHQ/TxCache.h b/Source/GlideHQ/TxCache.h
new file mode 100644
index 000000000..0b31b5443
--- /dev/null
+++ b/Source/GlideHQ/TxCache.h
@@ -0,0 +1,69 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXCACHE_H__
+#define __TXCACHE_H__
+
+#include "TxInternal.h"
+#include "TxUtil.h"
+#include <list>
+#include <map>
+#include <string>
+
+class TxCache
+{
+private:
+  std::list<uint64> _cachelist;
+  uint8 *_gzdest0;
+  uint8 *_gzdest1;
+  uint32 _gzdestLen;
+protected:
+  int _options;
+  std::wstring _ident;
+  std::wstring _path;
+  dispInfoFuncExt _callback;
+  TxUtil *_txUtil;
+  struct TXCACHE {
+    int size;
+    GHQTexInfo info;
+    std::list<uint64>::iterator it;
+  };
+  int _totalSize;
+  int _cacheSize;
+  std::map<uint64, TXCACHE*> _cache;
+  boolean save(const wchar_t *path, const wchar_t *filename, const int config);
+  boolean load(const wchar_t *path, const wchar_t *filename, const int config);
+  boolean del(uint64 checksum); /* checksum hi:palette low:texture */
+  boolean is_cached(uint64 checksum); /* checksum hi:palette low:texture */
+  void clear();
+public:
+  ~TxCache();
+  TxCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+              dispInfoFuncExt callback);
+  boolean add(uint64 checksum, /* checksum hi:palette low:texture */
+              GHQTexInfo *info, int dataSize = 0);
+  boolean get(uint64 checksum, /* checksum hi:palette low:texture */
+              GHQTexInfo *info);
+};
+
+#endif /* __TXCACHE_H__ */
diff --git a/Source/GlideHQ/TxDbg.cpp b/Source/GlideHQ/TxDbg.cpp
new file mode 100644
index 000000000..f648d11f4
--- /dev/null
+++ b/Source/GlideHQ/TxDbg.cpp
@@ -0,0 +1,69 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DBG_LEVEL 80
+
+#include "TxDbg.h"
+#include <string.h>
+#include <stdarg.h>
+#include <Common/std string.h>
+#include <Common/path.h>
+
+TxDbg::TxDbg()
+{
+  _level = DBG_LEVEL;
+  CPath Dir(CPath::MODULE_DIRECTORY,"");
+  Dir.AppendDirectory("Logs");
+
+  if (!_dbgfile)
+#ifdef GHQCHK
+    _dbgfile = fopen(CPath(Dir,"ghqchk.txt"), "w");
+#else
+    _dbgfile = fopen(CPath((LPCSTR)Dir,"glidehq.dbg"), "w");
+#endif
+}
+
+TxDbg::~TxDbg()
+{
+  if (_dbgfile) {
+    fclose(_dbgfile);
+    _dbgfile = 0;
+  }
+
+  _level = DBG_LEVEL;
+}
+
+void
+TxDbg::output(const int level, const wchar_t *format, ...)
+{
+	if (level > _level)
+		return;
+
+	stdstr_f newformat("%d:\t%s",level,stdstr().FromUTF16(format).c_str());
+
+	va_list args;
+	va_start(args, format);
+	vfwprintf(_dbgfile, newformat.ToUTF16().c_str(), args);
+	fflush(_dbgfile);
+	va_end(args);
+}
diff --git a/Source/GlideHQ/TxDbg.h b/Source/GlideHQ/TxDbg.h
new file mode 100644
index 000000000..f22b87e87
--- /dev/null
+++ b/Source/GlideHQ/TxDbg.h
@@ -0,0 +1,61 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXDBG_H__
+#define __TXDBG_H__
+
+#include <stdio.h>
+#include "TxInternal.h"
+
+class TxDbg
+{
+private:
+  FILE* _dbgfile;
+  int _level;
+  TxDbg();
+public:
+  static TxDbg* getInstance() {
+    static TxDbg txDbg;
+    return &txDbg;
+  }
+  ~TxDbg();
+  void output(const int level, const wchar_t *format, ...);
+};
+
+#ifdef DEBUG
+#define DBG_INFO TxDbg::getInstance()->output
+#define INFO DBG_INFO
+#else
+#define DBG_INFO 0 && (wchar_t)
+#ifdef GHQCHK
+#define INFO TxDbg::getInstance()->output
+#else
+#if 0 /* XXX enable this to log basic hires texture checks */
+#define INFO TxDbg::getInstance()->output
+#else
+#define INFO DBG_INFO
+#endif
+#endif
+#endif
+
+#endif /* __TXDBG_H__ */
diff --git a/Source/GlideHQ/TxFilter.cpp b/Source/GlideHQ/TxFilter.cpp
new file mode 100644
index 000000000..39b0a3296
--- /dev/null
+++ b/Source/GlideHQ/TxFilter.cpp
@@ -0,0 +1,692 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef WIN32
+#pragma warning(disable: 4786)
+#endif
+
+#include <common/path.h>
+#include "TxFilter.h"
+#include "TextureFilters.h"
+#include "TxDbg.h"
+#include "bldno.h"
+
+void TxFilter::clear()
+{
+  /* clear hires texture cache */
+  delete _txHiResCache;
+
+  /* clear texture cache */
+  delete _txTexCache;
+
+  /* free memory */
+  TxMemBuf::getInstance()->shutdown();
+
+  /* clear other stuff */
+  delete _txImage;
+  delete _txQuantize;
+  delete _txUtil;
+}
+
+TxFilter::~TxFilter()
+{
+  clear();
+}
+
+TxFilter::TxFilter(int maxwidth, int maxheight, int maxbpp, int options,
+                   int cachesize, wchar_t *path, wchar_t *ident,
+				   dispInfoFuncExt callback) :
+	_numcore(0),
+	_tex1(NULL),
+	_tex2(NULL),
+	_maxwidth(0),
+	_maxheight(0),
+	_maxbpp(0),
+	_options(0),
+	_cacheSize(0),
+	_txQuantize(NULL),
+	_txTexCache(NULL),
+	_txHiResCache(NULL),
+	_txUtil(NULL),
+	_txImage(NULL),
+	_initialized(false)
+{
+  /* HACKALERT: the emulator misbehaves and sometimes forgets to shutdown */
+  if ((ident && wcscmp(ident, L"DEFAULT") != 0 && _ident.compare(ident) == 0) &&
+      _maxwidth  == maxwidth  &&
+      _maxheight == maxheight &&
+      _maxbpp    == maxbpp    &&
+      _options   == options   &&
+      _cacheSize == cachesize) return;
+  clear(); /* gcc does not allow the destructor to be called */
+
+  /* shamelessness :P this first call to the debug output message creates
+   * a file in the executable directory. */
+  INFO(0, L"------------------------------------------------------------------\n");
+#ifdef GHQCHK
+  INFO(0, L" GlideHQ Hires Texture Checker 1.02.00.%d\n", BUILD_NUMBER);
+#else
+//  INFO(0, L" GlideHQ version 1.02.00.%d\n", BUILD_NUMBER);
+#endif
+  INFO(0, L" Copyright (C) 2010  Hiroshi Morii   All Rights Reserved\n");
+  INFO(0, L"    email   : koolsmoky(at)users.sourceforge.net\n");
+  INFO(0, L"    website : http://www.3dfxzone.it/koolsmoky\n");
+  INFO(0, L"\n");
+  INFO(0, L" Glide64 official website : http://glide64.emuxhaven.net\n");
+  INFO(0, L"------------------------------------------------------------------\n");
+
+  _options = options;
+
+  _txImage      = new TxImage();
+  _txQuantize   = new TxQuantize();
+  _txUtil       = new TxUtil();
+
+  /* get number of CPU cores. */
+  _numcore = _txUtil->getNumberofProcessors();
+
+  _initialized = 0;
+
+  _tex1 = NULL;
+  _tex2 = NULL;
+
+  /* XXX: anything larger than 1024 * 1024 is overkill */
+  _maxwidth  = maxwidth  > 1024 ? 1024 : maxwidth;
+  _maxheight = maxheight > 1024 ? 1024 : maxheight;
+  _maxbpp    = maxbpp;
+
+  _cacheSize = cachesize;
+
+  /* TODO: validate options and do overrides here*/
+
+  /* save path name */
+  if (path)
+    _path.assign(path);
+
+  /* save ROM name */
+  if (ident && wcscmp(ident, L"DEFAULT") != 0)
+    _ident.assign(ident);
+
+  /* check for dxtn extensions */
+  if (!TxLoadLib::getInstance()->getdxtCompressTexFuncExt())
+    _options &= ~S3TC_COMPRESSION;
+
+  if (!TxLoadLib::getInstance()->getfxtCompressTexFuncExt())
+    _options &= ~FXT1_COMPRESSION;
+
+  switch (options & COMPRESSION_MASK) {
+  case FXT1_COMPRESSION:
+  case S3TC_COMPRESSION:
+    break;
+  case NCC_COMPRESSION:
+  default:
+    _options &= ~COMPRESSION_MASK;
+  }
+
+  if (TxMemBuf::getInstance()->init(_maxwidth, _maxheight)) {
+    if (!_tex1)
+      _tex1 = TxMemBuf::getInstance()->get(0);
+
+    if (!_tex2)
+      _tex2 = TxMemBuf::getInstance()->get(1);
+  }
+
+#if !_16BPP_HACK
+  /* initialize hq4x filter */
+  hq4x_init();
+#endif
+
+  /* initialize texture cache in bytes. 128Mb will do nicely in most cases */
+  _txTexCache = new TxTexCache(_options, _cacheSize, _path.c_str(), _ident.c_str(), callback);
+
+  /* hires texture */
+#if HIRES_TEXTURE
+  _txHiResCache = new TxHiResCache(_maxwidth, _maxheight, _maxbpp, _options, _path.c_str(), _ident.c_str(), callback);
+
+  if (_txHiResCache->empty())
+    _options &= ~HIRESTEXTURES_MASK;
+#endif
+
+  if (!(_options & COMPRESS_TEX))
+    _options &= ~COMPRESSION_MASK;
+
+  if (_tex1 && _tex2)
+      _initialized = 1;
+}
+
+boolean
+TxFilter::filter(uint8 *src, int srcwidth, int srcheight, uint16 srcformat, uint64 g64crc, GHQTexInfo *info)
+{
+  uint8 *texture = src;
+  uint8 *tmptex = _tex1;
+  uint16 destformat = srcformat;
+
+  /* We need to be initialized first! */
+  if (!_initialized) return 0;
+
+  /* find cached textures */
+  if (_cacheSize) {
+
+    /* calculate checksum of source texture */
+    if (!g64crc)
+      g64crc = (uint64)(_txUtil->checksumTx(texture, srcwidth, srcheight, srcformat));
+
+    DBG_INFO(80, L"filter: crc:%08X %08X %d x %d gfmt:%x\n",
+             (uint32)(g64crc >> 32), (uint32)(g64crc & 0xffffffff), srcwidth, srcheight, srcformat);
+
+#if 0 /* use hirestex to retrieve cached textures. */
+    /* check if we have it in cache */
+    if (!(g64crc & 0xffffffff00000000) && /* we reach here only when there is no hires texture for this crc */
+        _txTexCache->get(g64crc, info)) {
+      DBG_INFO(80, L"cache hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+      return 1; /* yep, we've got it */
+    }
+#endif
+  }
+
+  /* Leave small textures alone because filtering makes little difference.
+   * Moreover, some filters require at least 4 * 4 to work.
+   * Bypass _options to do ARGB8888->16bpp if _maxbpp=16 or forced color reduction.
+   */
+  if ((srcwidth >= 4 && srcheight >= 4) &&
+      ((_options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESSION_MASK)) ||
+       (srcformat == GR_TEXFMT_ARGB_8888 && (_maxbpp < 32 || _options & FORCE16BPP_TEX)))) {
+
+#if !_16BPP_HACK
+    /* convert textures to a format that the compressor accepts (ARGB8888) */
+    if (_options & COMPRESSION_MASK) {
+#endif
+      if (srcformat != GR_TEXFMT_ARGB_8888) {
+        if (!_txQuantize->quantize(texture, tmptex, srcwidth, srcheight, srcformat, GR_TEXFMT_ARGB_8888)) {
+          DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", srcformat);
+          return 0;
+        }
+        texture = tmptex;
+        destformat = GR_TEXFMT_ARGB_8888;
+      }
+#if !_16BPP_HACK
+    }
+#endif
+
+    switch (destformat) {
+    case GR_TEXFMT_ARGB_8888:
+
+      /*
+       * prepare texture enhancements (x2, x4 scalers)
+       */
+      int scale_shift = 0, num_filters = 0;
+      uint32 filter = 0;
+
+      if ((_options & ENHANCEMENT_MASK) == HQ4X_ENHANCEMENT) {
+        if (srcwidth  <= (_maxwidth >> 2) && srcheight <= (_maxheight >> 2)) {
+          filter |= HQ4X_ENHANCEMENT;
+          scale_shift = 2;
+          num_filters++;
+        } else if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          filter |= HQ2X_ENHANCEMENT;
+          scale_shift = 1;
+          num_filters++;
+        }
+      } else if (_options & ENHANCEMENT_MASK) {
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          filter |= (_options & ENHANCEMENT_MASK);
+          scale_shift = 1;
+          num_filters++;
+        }
+      }
+
+      /*
+       * prepare texture filters
+       */
+      if (_options & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) {
+        filter |= (_options & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK));
+        num_filters++;
+      }
+
+      /*
+       * execute texture enhancements and filters
+       */
+      while (num_filters > 0) {
+
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+
+        uint8 *_texture = texture;
+        uint8 *_tmptex  = tmptex;
+
+        unsigned int numcore = _numcore;
+        unsigned int blkrow = 0;
+        while (numcore > 1 && blkrow == 0) {
+          blkrow = (srcheight >> 2) / numcore;
+          numcore--;
+        }
+        if (blkrow > 0 && numcore > 1) {
+#ifdef tofix
+		  boost::thread *thrd[MAX_NUMCORE];
+          unsigned int i;
+          int blkheight = blkrow << 2;
+          unsigned int srcStride = (srcwidth * blkheight) << 2;
+          unsigned int destStride = srcStride << scale_shift << scale_shift;
+          for (i = 0; i < numcore - 1; i++) {
+            thrd[i] = new boost::thread(boost::bind(filter_8888,
+                                                    (uint32*)_texture,
+                                                    srcwidth,
+                                                    blkheight,
+                                                    (uint32*)_tmptex,
+                                                    filter));
+            _texture += srcStride;
+            _tmptex  += destStride;
+          }
+          thrd[i] = new boost::thread(boost::bind(filter_8888,
+                                                  (uint32*)_texture,
+                                                  srcwidth,
+                                                  srcheight - blkheight * i,
+                                                  (uint32*)_tmptex,
+                                                  filter));
+          for (i = 0; i < numcore; i++) {
+            thrd[i]->join();
+            delete thrd[i];
+          }
+#endif
+        } else {
+          filter_8888((uint32*)_texture, srcwidth, srcheight, (uint32*)_tmptex, filter);
+        }
+
+        if (filter & ENHANCEMENT_MASK) {
+          srcwidth  <<= scale_shift;
+          srcheight <<= scale_shift;
+          filter &= ~ENHANCEMENT_MASK;
+          scale_shift = 0;
+        }
+
+        texture = tmptex;
+        num_filters--;
+      }
+
+      /*
+       * texture compression
+       */
+      /* ignored if we only have texture compression option on.
+       * only done when texture enhancer is used. see constructor. */
+      if ((_options & COMPRESSION_MASK) &&
+          (srcwidth >= 64 && srcheight >= 64) /* Texture compression is not suitable for low pixel coarse detail
+                                               * textures. The assumption here is that textures larger than 64x64
+                                               * have enough detail to produce decent quality when compressed. The
+                                               * down side is that narrow stripped textures that the N64 often use
+                                               * for large background textures are also ignored. It would be more
+                                               * reasonable if decisions are made based on fourier-transform
+                                               * spectrum or RMS error.
+                                               */
+          ) {
+        int compressionType = _options & COMPRESSION_MASK;
+        int tmpwidth, tmpheight;
+        uint16 tmpformat;
+        /* XXX: textures that use 8bit alpha channel look bad with the current
+         * fxt1 library, so we substitute it with dxtn for now. afaik all gfx
+         * cards that support fxt1 also support dxtn. (3dfx and Intel) */
+        if ((destformat == GR_TEXFMT_ALPHA_INTENSITY_88) ||
+            (destformat == GR_TEXFMT_ARGB_8888) ||
+            (destformat == GR_TEXFMT_ALPHA_8)) {
+          compressionType = S3TC_COMPRESSION;
+        }
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+        if (_txQuantize->compress(texture, tmptex,
+                                  srcwidth, srcheight, srcformat,
+                                  &tmpwidth, &tmpheight, &tmpformat,
+                                  compressionType)) {
+          srcwidth = tmpwidth;
+          srcheight = tmpheight;
+          destformat = tmpformat;
+          texture = tmptex;
+        }
+      }
+
+
+      /*
+       * texture (re)conversions
+       */
+      if (destformat == GR_TEXFMT_ARGB_8888) {
+        if (srcformat == GR_TEXFMT_ARGB_8888 && (_maxbpp < 32 || _options & FORCE16BPP_TEX)) srcformat = GR_TEXFMT_ARGB_4444;
+        if (srcformat != GR_TEXFMT_ARGB_8888) {
+          tmptex = (texture == _tex1) ? _tex2 : _tex1;
+          if (!_txQuantize->quantize(texture, tmptex, srcwidth, srcheight, GR_TEXFMT_ARGB_8888, srcformat)) {
+            DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", srcformat);
+            return 0;
+          }
+          texture = tmptex;
+          destformat = srcformat;
+        }
+      }
+
+      break;
+#if !_16BPP_HACK
+    case GR_TEXFMT_ARGB_4444:
+
+      int scale_shift = 0;
+      tmptex = (texture == _tex1) ? _tex2 : _tex1;
+
+      switch (_options & ENHANCEMENT_MASK) {
+      case HQ4X_ENHANCEMENT:
+        if (srcwidth <= (_maxwidth >> 2) && srcheight <= (_maxheight >> 2)) {
+          hq4x_4444((uint8*)texture, (uint8*)tmptex, srcwidth, srcheight, srcwidth, srcwidth * 4 * 2);
+          scale_shift = 2;
+        }/* else if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          hq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }*/
+        break;
+      case HQ2X_ENHANCEMENT:
+        if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          hq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case HQ2XS_ENHANCEMENT:
+        if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          hq2xS_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case LQ2X_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          lq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case LQ2XS_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          lq2xS_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case X2SAI_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          Super2xSaI_4444((uint16*)texture, (uint16*)tmptex, srcwidth, srcheight, srcwidth);
+          scale_shift = 1;
+        }
+        break;
+      case X2_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          Texture2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+      }
+      if (scale_shift) {
+        srcwidth <<= scale_shift;
+        srcheight <<= scale_shift;
+        texture = tmptex;
+      }
+
+      if (_options & SMOOTH_FILTER_MASK) {
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+        SmoothFilter_4444((uint16*)texture, srcwidth, srcheight, (uint16*)tmptex, (_options & SMOOTH_FILTER_MASK));
+        texture = tmptex;
+      } else if (_options & SHARP_FILTER_MASK) {
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+        SharpFilter_4444((uint16*)texture, srcwidth, srcheight, (uint16*)tmptex, (_options & SHARP_FILTER_MASK));
+        texture = tmptex;
+      }
+
+      break;
+    case GR_TEXFMT_ARGB_1555:
+      break;
+    case GR_TEXFMT_RGB_565:
+      break;
+    case GR_TEXFMT_ALPHA_8:
+      break;
+#endif /* _16BPP_HACK */
+    }
+  }
+
+  /* fill in the texture info. */
+  info->data = texture;
+  info->width  = srcwidth;
+  info->height = srcheight;
+  info->format = destformat;
+  info->smallLodLog2 = _txUtil->grLodLog2(srcwidth, srcheight);
+  info->largeLodLog2 = info->smallLodLog2;
+  info->aspectRatioLog2 = _txUtil->grAspectRatioLog2(srcwidth, srcheight);
+  info->is_hires_tex = 0;
+
+  /* cache the texture. */
+  if (_cacheSize) _txTexCache->add(g64crc, info);
+
+  DBG_INFO(80, L"filtered texture: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+
+  return 1;
+}
+
+boolean
+TxFilter::hirestex(uint64 g64crc, uint64 r_crc64, uint16 *palette, GHQTexInfo *info)
+{
+  /* NOTE: Rice CRC32 sometimes return the same value for different textures.
+   * As a workaround, Glide64 CRC32 is used for the key for NON-hires
+   * texture cache.
+   *
+   * r_crc64 = hi:palette low:texture
+   *           (separate crc. doesn't necessary have to be rice crc)
+   * g64crc  = texture + palette glide64 crc32
+   *           (can be any other crc if robust)
+   */
+
+  DBG_INFO(80, L"hirestex: r_crc64:%08X %08X, g64crc:%08X %08X\n",
+           (uint32)(r_crc64 >> 32), (uint32)(r_crc64 & 0xffffffff),
+           (uint32)(g64crc >> 32), (uint32)(g64crc & 0xffffffff));
+
+#if HIRES_TEXTURE
+  /* check if we have it in hires memory cache. */
+  if ((_options & HIRESTEXTURES_MASK) && r_crc64) {
+    if (_txHiResCache->get(r_crc64, info)) {
+      DBG_INFO(80, L"hires hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+
+      /* TODO: Enable emulation for special N64 combiner modes. There are few ways
+       * to get this done. Also applies for CI textures below.
+       *
+       * Solution 1. Load the hiresolution textures in ARGB8888 (or A8, IA88) format
+       * to cache. When a cache is hit, then we take the modes passed in from Glide64
+       * (also TODO) and apply the modification. Then we do color reduction or format
+       * conversion or compression if desired and stuff it into the non-hires texture
+       * cache.
+       *
+       * Solution 2. When a cache is hit and if the combiner modes are present,
+       * convert the texture to ARGB4444 and pass it back to Glide64 to process.
+       * If a texture is compressed, it needs to be decompressed first. Then add
+       * the processed texture to the non-hires texture cache.
+       *
+       * Solution 3. Hybrid of the above 2. Load the textures in ARGB8888 (A8, IA88)
+       * format. Convert the texture to ARGB4444 and pass it back to Glide64 when
+       * the combiner modes are present. Get the processed texture back from Glide64
+       * and compress if desired and add it to the non-hires texture cache.
+       *
+       * Solution 4. Take the easy way out and forget about this whole thing.
+       */
+
+      return 1; /* yep, got it */
+    }
+    if (_txHiResCache->get((r_crc64 & 0xffffffff), info)) {
+      DBG_INFO(80, L"hires hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+
+      /* for true CI textures, we use the passed in palette to convert to
+       * ARGB1555 and add it to memory cache.
+       *
+       * NOTE: we do this AFTER all other texture cache searches because
+       * only a few texture packs actually use true CI textures.
+       *
+       * NOTE: the pre-converted palette from Glide64 is in RGBA5551 format.
+       * A comp comes before RGB comp.
+       */
+      if (palette && info->format == GR_TEXFMT_P_8) {
+        DBG_INFO(80, L"found GR_TEXFMT_P_8 format. Need conversion!!\n");
+
+        int width = info->width;
+        int height = info->height;
+        uint16 format = info->format;
+        /* XXX: avoid collision with zlib compression buffer in TxHiResTexture::get */
+        uint8 *texture = info->data;
+        uint8 *tmptex = (texture == _tex1) ? _tex2 : _tex1;
+
+        /* use palette and convert to 16bit format */
+        _txQuantize->P8_16BPP((uint32*)texture, (uint32*)tmptex, info->width, info->height, (uint32*)palette);
+        texture = tmptex;
+        format = GR_TEXFMT_ARGB_1555;
+
+#if 1
+        /* XXX: compressed if memory cache compression is ON */
+        if (_options & COMPRESSION_MASK) {
+          tmptex = (texture == _tex1) ? _tex2 : _tex1;
+          if (_txQuantize->quantize(texture, tmptex, info->width, info->height, format, GR_TEXFMT_ARGB_8888)) {
+            texture = tmptex;
+            format = GR_TEXFMT_ARGB_8888;
+          }
+          if (format == GR_TEXFMT_ARGB_8888) {
+            tmptex = (texture == _tex1) ? _tex2 : _tex1;
+            if (_txQuantize->compress(texture, tmptex,
+                                      info->width, info->height, GR_TEXFMT_ARGB_1555,
+                                      &width, &height, &format,
+                                      _options & COMPRESSION_MASK)) {
+              texture = tmptex;
+            } else {
+              /*if (!_txQuantize->quantize(texture, tmptex, info->width, info->height, GR_TEXFMT_ARGB_8888, GR_TEXFMT_ARGB_1555)) {
+                DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", format);
+                return 0;
+              }*/
+              texture = tmptex;
+              format = GR_TEXFMT_ARGB_1555;
+            }
+          }
+        }
+#endif
+
+        /* fill in the required info to return */
+        info->data = texture;
+        info->width = width;
+        info->height = height;
+        info->format = format;
+        info->smallLodLog2 = _txUtil->grLodLog2(width, height);
+        info->largeLodLog2 = info->smallLodLog2;
+        info->aspectRatioLog2 = _txUtil->grAspectRatioLog2(width, height);
+        info->is_hires_tex = 1;
+
+        /* XXX: add to hires texture cache!!! */
+        _txHiResCache->add(r_crc64, info);
+
+        DBG_INFO(80, L"GR_TEXFMT_P_8 loaded as gfmt:%x!\n", format);
+      }
+
+      return 1;
+    }
+  }
+#endif
+
+  /* check if we have it in memory cache */
+  if (_cacheSize && g64crc) {
+    if (_txTexCache->get(g64crc, info)) {
+      DBG_INFO(80, L"cache hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+      return 1; /* yep, we've got it */
+    }
+  }
+
+  DBG_INFO(80, L"no cache hits.\n");
+
+  return 0;
+}
+
+uint64
+TxFilter::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  if (_options & (HIRESTEXTURES_MASK|DUMP_TEX))
+    return _txUtil->checksum64(src, width, height, size, rowStride, palette);
+
+  return 0;
+}
+
+boolean
+TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64)
+{
+  if (!_initialized)
+    return 0;
+
+  if (!(_options & DUMP_TEX))
+    return 0;
+
+  DBG_INFO(80, L"gfmt = %02x n64fmt = %02x\n", gfmt, n64fmt);
+  DBG_INFO(80, L"hirestex: r_crc64:%08X %08X\n",
+           (uint32)(r_crc64 >> 32), (uint32)(r_crc64 & 0xffffffff));
+
+  if (!_txQuantize->quantize(src, _tex1, rowStridePixel, height, (gfmt & 0x00ff), GR_TEXFMT_ARGB_8888))
+    return 0;
+
+  src = _tex1;
+
+  if (!_path.empty() && !_ident.empty()) {
+    /* dump it to disk */
+    FILE *fp = NULL;
+    CPath tmpbuf(stdstr().FromUTF16(_path.c_str()).c_str(),"");
+
+    /* create directories */
+	tmpbuf.AppendDirectory("texture_dump");
+
+	if (!tmpbuf.DirectoryExists() && !tmpbuf.CreateDirectory())
+      return 0;
+
+	tmpbuf.AppendDirectory(stdstr().FromUTF16(_ident.c_str()).c_str());
+	if (!tmpbuf.DirectoryExists() && !tmpbuf.CreateDirectory())
+		return 0;
+
+	tmpbuf.AppendDirectory("GlideHQ");
+	if (!tmpbuf.DirectoryExists() && !tmpbuf.CreateDirectory())
+		return 0;
+
+    if ((n64fmt >> 8) == 0x2) {
+      tmpbuf.SetNameExtension(stdstr_f("%ls#%08X#%01X#%01X#%08X_ciByRGBA.png",_ident.c_str(),(uint32)(r_crc64 & 0xffffffff), (n64fmt >> 8),(n64fmt & 0xf),(uint32)(r_crc64 >> 32)).c_str());
+    } else {
+      tmpbuf.SetNameExtension(stdstr_f("%ls#%08X#%01X#%01X_all.png",_ident.c_str(),(uint32)(r_crc64 & 0xffffffff),(n64fmt >> 8),(n64fmt & 0xf)).c_str());
+    }
+#ifdef WIN32
+    if ((fp = fopen(tmpbuf, "wb")) != NULL) {
+#else
+    char cbuf[MAX_PATH];
+    wcstombs(cbuf, tmpbuf.c_str(), MAX_PATH);
+    if ((fp = fopen(cbuf, "wb")) != NULL) {
+#endif
+      _txImage->writePNG(src, fp, width, height, (rowStridePixel << 2), 0x0003, 0);
+      fclose(fp);
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+boolean
+TxFilter::reloadhirestex()
+{
+  DBG_INFO(80, L"Reload hires textures from texture pack.\n");
+
+  if (_txHiResCache->load(0)) {
+    if (_txHiResCache->empty()) _options &= ~HIRESTEXTURES_MASK;
+    else _options |= HIRESTEXTURES_MASK;
+
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/Source/GlideHQ/TxFilter.h b/Source/GlideHQ/TxFilter.h
new file mode 100644
index 000000000..fdbd0268c
--- /dev/null
+++ b/Source/GlideHQ/TxFilter.h
@@ -0,0 +1,81 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXFILTER_H__
+#define __TXFILTER_H__
+
+#include "TxInternal.h"
+#include "TxQuantize.h"
+#include "TxHiResCache.h"
+#include "TxTexCache.h"
+#include "TxUtil.h"
+#include "TxImage.h"
+#include <string>
+
+class TxFilter
+{
+private:
+  int _numcore;
+
+  uint8 *_tex1;
+  uint8 *_tex2;
+  int _maxwidth;
+  int _maxheight;
+  int _maxbpp;
+  int _options;
+  int _cacheSize;
+  std::wstring _ident;
+  std::wstring _path;
+  TxQuantize *_txQuantize;
+  TxTexCache *_txTexCache;
+  TxHiResCache *_txHiResCache;
+  TxUtil *_txUtil;
+  TxImage *_txImage;
+  boolean _initialized;
+  void clear();
+public:
+  ~TxFilter();
+  TxFilter(int maxwidth,
+           int maxheight,
+           int maxbpp,
+           int options,
+           int cachesize,
+           wchar_t *path,
+           wchar_t *ident,
+           dispInfoFuncExt callback);
+  boolean filter(uint8 *src,
+                  int srcwidth,
+                  int srcheight,
+                  uint16 srcformat,
+                  uint64 g64crc, /* glide64 crc, 64bit for future use */
+                  GHQTexInfo *info);
+  boolean hirestex(uint64 g64crc, /* glide64 crc, 64bit for future use */
+                      uint64 r_crc64,   /* checksum hi:palette low:texture */
+                      uint16 *palette,
+                      GHQTexInfo *info);
+  uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette);
+  boolean dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64);
+  boolean reloadhirestex();
+};
+
+#endif /* __TXFILTER_H__ */
diff --git a/Source/GlideHQ/TxFilterExport.cpp b/Source/GlideHQ/TxFilterExport.cpp
new file mode 100644
index 000000000..7447c3c95
--- /dev/null
+++ b/Source/GlideHQ/TxFilterExport.cpp
@@ -0,0 +1,105 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef WIN32
+#pragma warning(disable: 4786)
+#endif
+
+#include "TxFilter.h"
+
+TxFilter *txFilter = NULL;
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+TAPI boolean TAPIENTRY
+txfilter_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize,
+              wchar_t *path, wchar_t*ident,
+              dispInfoFuncExt callback)
+{
+  if (txFilter) return 0;
+
+  txFilter = new TxFilter(maxwidth, maxheight, maxbpp, options, cachesize,
+                           path, ident, callback);
+
+  return (txFilter ? 1 : 0);
+}
+
+void txfilter_shutdown(void)
+{
+  if (txFilter) delete txFilter;
+
+  txFilter = NULL;
+}
+
+TAPI boolean TAPIENTRY
+txfilter(uint8 *src, int srcwidth, int srcheight, uint16 srcformat,
+         uint64 g64crc, GHQTexInfo *info)
+{
+  if (txFilter)
+    return txFilter->filter(src, srcwidth, srcheight, srcformat,
+                               g64crc, info);
+
+  return 0;
+}
+
+TAPI boolean TAPIENTRY
+txfilter_hirestex(uint64 g64crc, uint64 r_crc64, uint16 *palette, GHQTexInfo *info)
+{
+  if (txFilter)
+    return txFilter->hirestex(g64crc, r_crc64, palette, info);
+
+  return 0;
+}
+
+TAPI uint64 TAPIENTRY
+txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  if (txFilter)
+    return txFilter->checksum64(src, width, height, size, rowStride, palette);
+
+  return 0;
+}
+
+TAPI boolean TAPIENTRY
+txfilter_dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64)
+{
+  if (txFilter)
+    return txFilter->dmptx(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
+
+  return 0;
+}
+
+TAPI boolean TAPIENTRY
+txfilter_reloadhirestex()
+{
+  if (txFilter)
+    return txFilter->reloadhirestex();
+
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Source/GlideHQ/TxHiResCache.cpp b/Source/GlideHQ/TxHiResCache.cpp
new file mode 100644
index 000000000..d1c2afdac
--- /dev/null
+++ b/Source/GlideHQ/TxHiResCache.cpp
@@ -0,0 +1,1084 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* 2007 Gonetz <gonetz(at)ngs.ru>
+ * Added callback to display hires texture info. */
+
+#ifdef WIN32
+#pragma warning(disable: 4786)
+#endif
+
+/* dump processed hirestextures to disk 
+ * (0:disable, 1:enable) */
+#define DUMP_CACHE 1
+
+/* handle oversized textures by
+ *   0: minification
+ *   1: Glide64 style tiling
+ */
+#define TEXTURE_TILING 1
+
+/* use power of 2 texture size
+ * (0:disable, 1:enable, 2:3dfx) */
+#define POW2_TEXTURES 2
+
+#if TEXTURE_TILING
+#undef POW2_TEXTURES
+#define POW2_TEXTURES 2
+#endif
+
+/* hack to reduce texture footprint to achieve
+ * better performace on midrange gfx cards.
+ * (0:disable, 1:enable) */
+#define REDUCE_TEXTURE_FOOTPRINT 0
+
+/* use aggressive format assumption for quantization
+ * (0:disable, 1:enable, 2:extreme) */
+#define AGGRESSIVE_QUANTIZATION 1
+
+#include "TxHiResCache.h"
+#include "TxDbg.h"
+#include <zlib/zlib.h>
+#include <string>
+#include <common/path.h>
+
+TxHiResCache::~TxHiResCache()
+{
+#if DUMP_CACHE
+  if ((_options & DUMP_HIRESTEXCACHE) && !_haveCache && !_abortLoad) {
+    /* dump cache to disk */
+    std::wstring filename = _ident + L"_HIRESTEXTURES.dat";
+
+	CPath cachepath(stdstr().FromUTF16(_path.c_str()).c_str(),"");
+    cachepath.AppendDirectory("cache");
+    int config = _options & (HIRESTEXTURES_MASK|COMPRESS_HIRESTEX|COMPRESSION_MASK|TILE_HIRESTEX|FORCE16BPP_HIRESTEX|GZ_HIRESTEXCACHE|LET_TEXARTISTS_FLY);
+
+    TxCache::save(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config);
+  }
+#endif
+
+  delete _txImage;
+  delete _txQuantize;
+  delete _txReSample;
+}
+
+TxHiResCache::TxHiResCache(int maxwidth, int maxheight, int maxbpp, int options,
+                           const wchar_t *path, const wchar_t *ident,
+                           dispInfoFuncExt callback
+                           ) : TxCache((options & ~GZ_TEXCACHE), 0, path, ident, callback)
+{
+  _txImage = new TxImage();
+  _txQuantize  = new TxQuantize();
+  _txReSample = new TxReSample();
+
+  _maxwidth  = maxwidth;
+  _maxheight = maxheight;
+  _maxbpp    = maxbpp;
+  _abortLoad = 0;
+  _haveCache = 0;
+
+  /* assert local options */
+  if (!(_options & COMPRESS_HIRESTEX))
+    _options &= ~COMPRESSION_MASK;
+
+  if (_path.empty() || _ident.empty()) {
+    _options &= ~DUMP_HIRESTEXCACHE;
+    return;
+  }
+
+#if DUMP_CACHE
+  /* read in hires texture cache */
+  if (_options & DUMP_HIRESTEXCACHE) {
+    /* find it on disk */
+    std::wstring filename = _ident + L"_HIRESTEXTURES.dat";
+	CPath cachepath(stdstr().FromUTF16(_path.c_str()).c_str(),"");
+	cachepath.AppendDirectory("cache");
+    int config = _options & (HIRESTEXTURES_MASK|COMPRESS_HIRESTEX|COMPRESSION_MASK|TILE_HIRESTEX|FORCE16BPP_HIRESTEX|GZ_HIRESTEXCACHE|LET_TEXARTISTS_FLY);
+
+    _haveCache = TxCache::load(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config);
+  }
+#endif
+
+  /* read in hires textures */
+  if (!_haveCache) TxHiResCache::load(0);
+}
+
+boolean
+TxHiResCache::empty()
+{
+  return _cache.empty();
+}
+
+boolean
+TxHiResCache::load(boolean replace) /* 0 : reload, 1 : replace partial */
+{
+  if (!_path.empty() && !_ident.empty()) {
+
+    if (!replace) TxCache::clear();
+
+	CPath dir_path(stdstr().FromUTF16(_path.c_str()).c_str(),"");
+
+    switch (_options & HIRESTEXTURES_MASK) {
+    case GHQ_HIRESTEXTURES:
+      break;
+    case RICE_HIRESTEXTURES:
+      INFO(80, L"-----\n");
+      INFO(80, L"using Rice hires texture format...\n");
+      INFO(80, L"  must be one of the following;\n");
+      INFO(80, L"    1) *_rgb.png + *_a.png\n");
+      INFO(80, L"    2) *_all.png\n");
+      INFO(80, L"    3) *_ciByRGBA.png\n");
+      INFO(80, L"    4) *_allciByRGBA.png\n");
+      INFO(80, L"    5) *_ci.bmp\n");
+      INFO(80, L"  usage of only 2) and 3) highly recommended!\n");
+      INFO(80, L"  folder names must be in US-ASCII characters!\n");
+
+	  dir_path.AppendDirectory("hires_texture");
+	  dir_path.AppendDirectory(stdstr().FromUTF16(_ident.c_str()).c_str());
+      loadHiResTextures(dir_path, replace);
+      break;
+    case JABO_HIRESTEXTURES:
+      ;
+    }
+
+    return 1;
+  }
+
+  return 0;
+}
+
+boolean
+TxHiResCache::loadHiResTextures(LPCSTR dir_path, boolean replace)
+{
+  DBG_INFO(80, L"-----\n");
+  DBG_INFO(80, L"path: %s\n", dir_path);
+
+//  _asm int 3
+#ifdef tofix
+  /* find it on disk */
+  if (!boost::filesystem::exists(dir_path)) {
+    INFO(80, L"Error: path not found!\n");
+    return 0;
+  }
+
+  /* XXX: deal with UNICODE fiasco!
+   * stupidity flows forth beneath this...
+   *
+   * I opted to use chdir in order to use fopen() for windows 9x.
+   */
+#ifdef WIN32
+  wchar_t curpath[MAX_PATH];
+  GETCWD(MAX_PATH, curpath);
+  CHDIR(dir_path.string().c_str());
+#else
+  char curpath[MAX_PATH];
+  char cbuf[MAX_PATH];
+  wcstombs(cbuf, dir_path.string().c_str(), MAX_PATH);
+  GETCWD(MAX_PATH, curpath);
+  CHDIR(cbuf);
+#endif
+
+  /* NOTE: I could use the boost::wdirectory_iterator and boost::wpath
+   * to resolve UNICODE file names and paths. But then, _wfopen() is
+   * required to get the file descriptor for MS Windows to pass into
+   * libpng, which is incompatible with Win9x. Win9x's fopen() cannot
+   * handle UNICODE names. UNICODE capable boost::filesystem is available
+   * with Boost1.34.1 built with VC8.0 (bjam --toolset=msvc-8.0 stage).
+   *
+   * RULE OF THUMB: NEVER save texture packs in NON-ASCII names!!
+   */
+  boost::filesystem::wdirectory_iterator it(dir_path);
+  boost::filesystem::wdirectory_iterator end_it; /* default construction yields past-the-end */
+
+  for (; it != end_it; ++it) {
+
+    if (KBHIT(0x1B)) {
+      _abortLoad = 1;
+      if (_callback) (*_callback)(L"Aborted loading hiresolution texture!\n");
+      INFO(80, L"Error: aborted loading hiresolution texture!\n");
+    }
+    if (_abortLoad) break;
+
+    /* recursive read into sub-directory */
+    if (boost::filesystem::is_directory(it->status())) {
+      loadHiResTextures(it->path(), replace);
+      continue;
+    }
+
+    DBG_INFO(80, L"-----\n");
+    DBG_INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+
+    int width = 0, height = 0;
+    uint16 format = 0;
+    uint8 *tex = NULL;
+    int tmpwidth = 0, tmpheight = 0;
+    uint16 tmpformat = 0;
+    uint8 *tmptex= NULL;
+    int untiled_width = 0, untiled_height = 0;
+    uint16 destformat = 0;
+
+    /* Rice hi-res textures: begin
+     */
+    uint32 chksum = 0, fmt = 0, siz = 0, palchksum = 0;
+    char *pfname = NULL, fname[MAX_PATH];
+    std::string ident;
+    FILE *fp = NULL;
+
+    wcstombs(fname, _ident.c_str(), MAX_PATH);
+    /* XXX case sensitivity fiasco!
+     * files must use _a, _rgb, _all, _allciByRGBA, _ciByRGBA, _ci
+     * and file extensions must be in lower case letters! */
+#ifdef WIN32
+    {
+      unsigned int i;
+      for (i = 0; i < strlen(fname); i++) fname[i] = tolower(fname[i]);
+    }
+#endif
+    ident.assign(fname);
+
+    /* read in Rice's file naming convention */
+#define CRCFMTSIZ_LEN 13
+#define PALCRC_LEN 9
+    wcstombs(fname, it->path().leaf().c_str(), MAX_PATH);
+    /* XXX case sensitivity fiasco!
+     * files must use _a, _rgb, _all, _allciByRGBA, _ciByRGBA, _ci
+     * and file extensions must be in lower case letters! */
+#ifdef WIN32
+    {
+      unsigned int i;
+      for (i = 0; i < strlen(fname); i++) fname[i] = tolower(fname[i]);
+    }
+#endif
+    pfname = fname + strlen(fname) - 4;
+    if (!(pfname == strstr(fname, ".png") ||
+          pfname == strstr(fname, ".bmp") ||
+          pfname == strstr(fname, ".dds"))) {
+#if !DEBUG
+      INFO(80, L"-----\n");
+      INFO(80, L"path: %ls\n", dir_path.string().c_str());
+      INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+      INFO(80, L"Error: not png or bmp or dds!\n");
+      continue;
+    }
+    pfname = strstr(fname, ident.c_str());
+    if (pfname != fname) pfname = 0;
+    if (pfname) {
+      if (sscanf(pfname + ident.size(), "#%08X#%01X#%01X#%08X", &chksum, &fmt, &siz, &palchksum) == 4)
+        pfname += (ident.size() + CRCFMTSIZ_LEN + PALCRC_LEN);
+      else if (sscanf(pfname + ident.size(), "#%08X#%01X#%01X", &chksum, &fmt, &siz) == 3)
+        pfname += (ident.size() + CRCFMTSIZ_LEN);
+      else
+        pfname = 0;
+    }
+    if (!pfname) {
+#if !DEBUG
+      INFO(80, L"-----\n");
+      INFO(80, L"path: %ls\n", dir_path.string().c_str());
+      INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+      INFO(80, L"Error: not Rice texture naming convention!\n");
+      continue;
+    }
+    if (!chksum) {
+#if !DEBUG
+      INFO(80, L"-----\n");
+      INFO(80, L"path: %ls\n", dir_path.string().c_str());
+      INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+      INFO(80, L"Error: crc32 = 0!\n");
+      continue;
+    }
+
+    /* check if we already have it in hires texture cache */
+    if (!replace) {
+      uint64 chksum64 = (uint64)palchksum;
+      chksum64 <<= 32;
+      chksum64 |= (uint64)chksum;
+      if (TxCache::is_cached(chksum64)) {
+#if !DEBUG
+        INFO(80, L"-----\n");
+        INFO(80, L"path: %ls\n", dir_path.string().c_str());
+        INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+        INFO(80, L"Error: already cached! duplicate texture!\n");
+        continue;
+      }
+    }
+
+    DBG_INFO(80, L"rom: %ls chksum:%08X %08X fmt:%x size:%x\n", _ident.c_str(), chksum, palchksum, fmt, siz);
+
+    /* Deal with the wackiness some texture packs utilize Rice format.
+     * Read in the following order: _a.* + _rgb.*, _all.png _ciByRGBA.png,
+     * _allciByRGBA.png, and _ci.bmp. PNG are prefered over BMP.
+     *
+     * For some reason there are texture packs that include them all. Some
+     * even have RGB textures named as _all.* and ARGB textures named as
+     * _rgb.*... Someone pleeeez write a GOOD guideline for the texture
+     * designers!!!
+     *
+     * We allow hires textures to have higher bpp than the N64 originals.
+     */
+    /* N64 formats
+     * Format: 0 - RGBA, 1 - YUV, 2 - CI, 3 - IA, 4 - I
+     * Size:   0 - 4bit, 1 - 8bit, 2 - 16bit, 3 - 32 bit
+     */
+
+    /*
+     * read in _rgb.* and _a.*
+     */
+    if (pfname == strstr(fname, "_rgb.") || pfname == strstr(fname, "_a.")) {
+      strcpy(pfname, "_rgb.png");
+      if (!boost::filesystem::exists(fname)) {
+        strcpy(pfname, "_rgb.bmp");
+        if (!boost::filesystem::exists(fname)) {
+#if !DEBUG
+          INFO(80, L"-----\n");
+          INFO(80, L"path: %ls\n", dir_path.string().c_str());
+          INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+          INFO(80, L"Error: missing _rgb.*! _a.* must be paired with _rgb.*!\n");
+          continue;
+        }
+      }
+      /* _a.png */
+      strcpy(pfname, "_a.png");
+      if ((fp = fopen(fname, "rb")) != NULL) {
+        tmptex = _txImage->readPNG(fp, &tmpwidth, &tmpheight, &tmpformat);
+        fclose(fp);
+      }
+      if (!tmptex) {
+        /* _a.bmp */
+        strcpy(pfname, "_a.bmp");
+        if ((fp = fopen(fname, "rb")) != NULL) {
+          tmptex = _txImage->readBMP(fp, &tmpwidth, &tmpheight, &tmpformat);
+          fclose(fp);
+        }
+      }
+      /* _rgb.png */
+      strcpy(pfname, "_rgb.png");
+      if ((fp = fopen(fname, "rb")) != NULL) {
+        tex = _txImage->readPNG(fp, &width, &height, &format);
+        fclose(fp);
+      }
+      if (!tex) {
+        /* _rgb.bmp */
+        strcpy(pfname, "_rgb.bmp");
+        if ((fp = fopen(fname, "rb")) != NULL) {
+          tex = _txImage->readBMP(fp, &width, &height, &format);
+          fclose(fp);
+        }
+      }
+      if (tmptex) {
+        /* check if _rgb.* and _a.* have matching size and format. */
+        if (!tex || width != tmpwidth || height != tmpheight ||
+            format != GR_TEXFMT_ARGB_8888 || tmpformat != GR_TEXFMT_ARGB_8888) {
+#if !DEBUG
+          INFO(80, L"-----\n");
+          INFO(80, L"path: %ls\n", dir_path.string().c_str());
+          INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+          if (!tex) {
+            INFO(80, L"Error: missing _rgb.*!\n");
+          } else if (width != tmpwidth || height != tmpheight) {
+            INFO(80, L"Error: _rgb.* and _a.* have mismatched width or height!\n");
+          } else if (format != GR_TEXFMT_ARGB_8888 || tmpformat != GR_TEXFMT_ARGB_8888) {
+            INFO(80, L"Error: _rgb.* or _a.* not in 32bit color!\n");
+          }
+          if (tex) free(tex);
+          if (tmptex) free(tmptex);
+          tex = NULL;
+          tmptex = NULL;
+          continue;
+        }
+      }
+      /* make adjustments */
+      if (tex) {
+        if (tmptex) {
+          /* merge (A)RGB and A comp */
+          DBG_INFO(80, L"merge (A)RGB and A comp\n");
+          int i;
+          for (i = 0; i < height * width; i++) {
+#if 1
+            /* use R comp for alpha. this is what Rice uses. sigh... */
+            ((uint32*)tex)[i] &= 0x00ffffff;
+            ((uint32*)tex)[i] |= ((((uint32*)tmptex)[i] & 0x00ff0000) << 8);
+#endif
+#if 0
+            /* use libpng style grayscale conversion */
+            uint32 texel = ((uint32*)tmptex)[i];
+            uint32 acomp = (((texel >> 16) & 0xff) * 6969 +
+                            ((texel >>  8) & 0xff) * 23434 +
+                            ((texel      ) & 0xff) * 2365) / 32768;
+            ((uint32*)tex)[i] = (acomp << 24) | (((uint32*)tex)[i] & 0x00ffffff);
+#endif
+#if 0
+            /* use the standard NTSC gray scale conversion */
+            uint32 texel = ((uint32*)tmptex)[i];
+            uint32 acomp = (((texel >> 16) & 0xff) * 299 +
+                            ((texel >>  8) & 0xff) * 587 +
+                            ((texel      ) & 0xff) * 114) / 1000;
+            ((uint32*)tex)[i] = (acomp << 24) | (((uint32*)tex)[i] & 0x00ffffff);
+#endif
+          }
+          free(tmptex);
+          tmptex = NULL;
+        } else {
+          /* clobber A comp. never a question of alpha. only RGB used. */
+#if !DEBUG
+          INFO(80, L"-----\n");
+          INFO(80, L"path: %ls\n", dir_path.string().c_str());
+          INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+          INFO(80, L"Warning: missing _a.*! only using _rgb.*. treat as opaque texture.\n");
+          int i;
+          for (i = 0; i < height * width; i++) {
+            ((uint32*)tex)[i] |= 0xff000000;
+          }
+        }
+      }
+    } else
+
+    /*
+     * read in _all.png, _all.dds, _allciByRGBA.png, _allciByRGBA.dds
+     * _ciByRGBA.png, _ciByRGBA.dds, _ci.bmp
+     */
+    if (pfname == strstr(fname, "_all.png") ||
+        pfname == strstr(fname, "_all.dds") ||
+#ifdef WIN32
+        pfname == strstr(fname, "_allcibyrgba.png") ||
+        pfname == strstr(fname, "_allcibyrgba.dds") ||
+        pfname == strstr(fname, "_cibyrgba.png") ||
+        pfname == strstr(fname, "_cibyrgba.dds") ||
+#else
+        pfname == strstr(fname, "_allciByRGBA.png") ||
+        pfname == strstr(fname, "_allciByRGBA.dds") ||
+        pfname == strstr(fname, "_ciByRGBA.png") ||
+        pfname == strstr(fname, "_ciByRGBA.dds") ||
+#endif
+        pfname == strstr(fname, "_ci.bmp")) {
+      if ((fp = fopen(fname, "rb")) != NULL) {
+        if      (strstr(fname, ".png")) tex = _txImage->readPNG(fp, &width, &height, &format);
+        else if (strstr(fname, ".dds")) tex = _txImage->readDDS(fp, &width, &height, &format);
+        else                            tex = _txImage->readBMP(fp, &width, &height, &format);
+        fclose(fp);
+      }
+      /* XXX: auto-adjustment of dxt dds textures unsupported for now */
+      if (tex && strstr(fname, ".dds")) {
+        const float aspectratio = (width > height) ? (float)width/(float)height : (float)height/(float)width;
+        if (!(aspectratio == 1.0 ||
+              aspectratio == 2.0 ||
+              aspectratio == 4.0 ||
+              aspectratio == 8.0)) {
+          free(tex);
+          tex = NULL;
+#if !DEBUG
+          INFO(80, L"-----\n");
+          INFO(80, L"path: %ls\n", dir_path.string().c_str());
+          INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+          INFO(80, L"Error: W:H aspect ratio range not 8:1 - 1:8!\n");
+          continue;
+        }
+        if (width  != _txReSample->nextPow2(width) ||
+            height != _txReSample->nextPow2(height)) {
+          free(tex);
+          tex = NULL;
+#if !DEBUG
+          INFO(80, L"-----\n");
+          INFO(80, L"path: %ls\n", dir_path.string().c_str());
+          INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+          INFO(80, L"Error: not power of 2 size!\n");
+          continue;
+        }
+      }
+    }
+
+    /* if we do not have a texture at this point we are screwed */
+    if (!tex) {
+#if !DEBUG
+      INFO(80, L"-----\n");
+      INFO(80, L"path: %ls\n", dir_path.string().c_str());
+      INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+      INFO(80, L"Error: load failed!\n");
+      continue;
+    }
+    DBG_INFO(80, L"read in as %d x %d gfmt:%x\n", tmpwidth, tmpheight, tmpformat);
+
+    /* check if size and format are OK */
+    if (!(format == GR_TEXFMT_ARGB_8888     ||
+          format == GR_TEXFMT_P_8           ||
+          format == GR_TEXFMT_ARGB_CMP_DXT1 ||
+          format == GR_TEXFMT_ARGB_CMP_DXT3 ||
+          format == GR_TEXFMT_ARGB_CMP_DXT5) ||
+        (width * height) < 4) { /* TxQuantize requirement: width * height must be 4 or larger. */
+      free(tex);
+      tex = NULL;
+#if !DEBUG
+      INFO(80, L"-----\n");
+      INFO(80, L"path: %ls\n", dir_path.string().c_str());
+      INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+      INFO(80, L"Error: not width * height > 4 or 8bit palette color or 32bpp or dxt1 or dxt3 or dxt5!\n");
+      continue;
+    }
+
+    /* analyze and determine best format to quantize */
+    if (format == GR_TEXFMT_ARGB_8888) {
+      int i;
+      int alphabits = 0;
+      int fullalpha = 0;
+      boolean intensity = 1;
+
+      if (!(_options & LET_TEXARTISTS_FLY)) {
+        /* HACK ALERT! */
+        /* Account for Rice's weirdness with fmt:0 siz:2 textures.
+         * Although the conditions are relaxed with other formats,
+         * the D3D RGBA5551 surface is used for this format in certain
+         * cases. See Nintemod's SuperMario64 life gauge and power
+         * meter. The same goes for fmt:2 textures. See Mollymutt's
+         * PaperMario text. */
+        if ((fmt == 0 && siz == 2) || fmt == 2) {
+          DBG_INFO(80, L"Remove black, white, etc borders along the alpha edges.\n");
+          /* round A comp */
+          for (i = 0; i < height * width; i++) {
+            uint32 texel = ((uint32*)tex)[i];
+            ((uint32*)tex)[i] = ((texel & 0xff000000) == 0xff000000 ? 0xff000000 : 0) |
+                                (texel & 0x00ffffff);
+          }
+          /* Substitute texel color with the average of the surrounding
+           * opaque texels. This removes borders regardless of hardware
+           * texture filtering (bilinear, etc). */
+          int j;
+          for (i = 0; i < height; i++) {
+            for (j = 0; j < width; j++) {
+              uint32 texel = ((uint32*)tex)[i * width + j];
+              if ((texel & 0xff000000) != 0xff000000) {
+                uint32 tmptexel[8];
+                uint32 k, numtexel, r, g, b;
+                numtexel = r = g = b = 0;
+                memset(&tmptexel, 0, sizeof(tmptexel));
+                if (i > 0) {
+                  tmptexel[0] = ((uint32*)tex)[(i - 1) * width + j];                        /* north */
+                  if (j > 0)         tmptexel[1] = ((uint32*)tex)[(i - 1) * width + j - 1]; /* north-west */
+                  if (j < width - 1) tmptexel[2] = ((uint32*)tex)[(i - 1) * width + j + 1]; /* north-east */
+                }
+                if (i < height - 1) {
+                  tmptexel[3] = ((uint32*)tex)[(i + 1) * width + j];                        /* south */
+                  if (j > 0)         tmptexel[4] = ((uint32*)tex)[(i + 1) * width + j - 1]; /* south-west */
+                  if (j < width - 1) tmptexel[5] = ((uint32*)tex)[(i + 1) * width + j + 1]; /* south-east */
+                }
+                if (j > 0)         tmptexel[6] = ((uint32*)tex)[i * width + j - 1]; /* west */
+                if (j < width - 1) tmptexel[7] = ((uint32*)tex)[i * width + j + 1]; /* east */
+                for (k = 0; k < 8; k++) {
+                  if ((tmptexel[k] & 0xff000000) == 0xff000000) {
+                    r += ((tmptexel[k] & 0x00ff0000) >> 16);
+                    g += ((tmptexel[k] & 0x0000ff00) >>  8);
+                    b += ((tmptexel[k] & 0x000000ff)      );
+                    numtexel++;
+                  }
+                }
+                if (numtexel) {
+                  ((uint32*)tex)[i * width + j] = ((r / numtexel) << 16) |
+                                                  ((g / numtexel) <<  8) |
+                                                  ((b / numtexel)      );
+                } else {
+                  ((uint32*)tex)[i * width + j] = texel & 0x00ffffff;
+                }
+              }
+            }
+          }
+        }
+      }
+
+      /* simple analysis of texture */
+      for (i = 0; i < height * width; i++) {
+        uint32 texel = ((uint32*)tex)[i];
+        if (alphabits != 8) {
+#if AGGRESSIVE_QUANTIZATION
+          if ((texel & 0xff000000) < 0x00000003) {
+            alphabits = 1;
+            fullalpha++;
+          } else if ((texel & 0xff000000) < 0xfe000000) {
+            alphabits = 8;
+          }
+#else
+          if ((texel & 0xff000000) == 0x00000000) {
+            alphabits = 1;
+            fullalpha++;
+          } else if ((texel & 0xff000000) != 0xff000000) {
+            alphabits = 8;
+          }
+#endif
+        }
+        if (intensity) {
+          int rcomp = (texel >> 16) & 0xff;
+          int gcomp = (texel >>  8) & 0xff;
+          int bcomp = (texel      ) & 0xff;
+#if AGGRESSIVE_QUANTIZATION
+          if (abs(rcomp - gcomp) > 8 || abs(rcomp - bcomp) > 8 || abs(gcomp - bcomp) > 8) intensity = 0;
+#else
+          if (rcomp != gcomp || rcomp != bcomp || gcomp != bcomp) intensity = 0;
+#endif
+        }
+        if (!intensity && alphabits == 8) break;
+      }
+      DBG_INFO(80, L"required alpha bits:%d zero acomp texels:%d rgb as intensity:%d\n", alphabits, fullalpha, intensity);
+
+      /* preparations based on above analysis */
+#if !REDUCE_TEXTURE_FOOTPRINT
+      if (_maxbpp < 32 || _options & (FORCE16BPP_HIRESTEX|COMPRESSION_MASK)) {
+#endif
+        if      (alphabits == 0) destformat = GR_TEXFMT_RGB_565;
+        else if (alphabits == 1) destformat = GR_TEXFMT_ARGB_1555;
+        else                     destformat = GR_TEXFMT_ARGB_8888;
+#if !REDUCE_TEXTURE_FOOTPRINT
+      } else {
+        destformat = GR_TEXFMT_ARGB_8888;
+      }
+#endif
+      if (fmt == 4 && alphabits == 0) {
+        destformat = GR_TEXFMT_ARGB_8888;
+        /* Rice I format; I = (R + G + B) / 3 */
+        for (i = 0; i < height * width; i++) {
+          uint32 texel = ((uint32*)tex)[i];
+          uint32 icomp = (((texel >> 16) & 0xff) +
+                          ((texel >>  8) & 0xff) +
+                          ((texel      ) & 0xff)) / 3;
+          ((uint32*)tex)[i] = (icomp << 24) | (texel & 0x00ffffff);
+        }
+      }
+      if (intensity) {
+        if (alphabits == 0) {
+          if (fmt == 4) destformat = GR_TEXFMT_ALPHA_8;
+          else          destformat = GR_TEXFMT_INTENSITY_8;
+        } else {
+          destformat = GR_TEXFMT_ALPHA_INTENSITY_88;
+        }
+      }
+
+      DBG_INFO(80, L"best gfmt:%x\n", destformat);
+    }
+    /*
+     * Rice hi-res textures: end */
+
+
+    /* XXX: only ARGB8888 for now. comeback to this later... */
+    if (format == GR_TEXFMT_ARGB_8888) {
+
+#if TEXTURE_TILING
+
+      /* Glide64 style texture tiling */
+      /* NOTE: narrow wide textures can be tiled into 256x256 size textures */
+
+      /* adjust texture size to allow tiling for V1, Rush, V2, Banshee, V3 */
+      /* NOTE: we skip this for palette textures that need minification
+       * becasue it will look ugly. */
+
+      /* minification */
+      {
+        int ratio = 1;
+
+        /* minification to enable glide64 style texture tiling */
+        /* determine the minification ratio to tile the texture into 256x256 size */
+        if ((_options & TILE_HIRESTEX) && _maxwidth >= 256 && _maxheight >= 256) {
+          DBG_INFO(80, L"determine minification ratio to tile\n");
+          tmpwidth = width;
+          tmpheight = height;
+          if (height > 256) {
+            ratio = ((height - 1) >> 8) + 1;
+            tmpwidth = width / ratio;
+            tmpheight = height / ratio;
+            DBG_INFO(80, L"height > 256, minification ratio:%d %d x %d -> %d x %d\n",
+                     ratio, width, height, tmpwidth, tmpheight);
+          }
+          if (tmpwidth > 256 && (((tmpwidth - 1) >> 8) + 1) * tmpheight > 256) {
+            ratio *= ((((((tmpwidth - 1) >> 8) + 1) * tmpheight) - 1) >> 8) + 1;
+            DBG_INFO(80, L"width > 256, minification ratio:%d %d x %d -> %d x %d\n",
+                     ratio, width, height, width / ratio, height / ratio);
+          }
+        } else {
+          /* normal minification to fit max texture size */
+          if (width > _maxwidth || height > _maxheight) {
+            DBG_INFO(80, L"determine minification ratio to fit max texture size\n");
+            tmpwidth = width;
+            tmpheight = height;
+            while (tmpwidth > _maxwidth) {
+              tmpheight >>= 1;
+              tmpwidth >>= 1;
+              ratio <<= 1;
+            }
+            while (tmpheight > _maxheight) {
+              tmpheight >>= 1;
+              tmpwidth >>= 1;
+              ratio <<= 1;
+            }
+            DBG_INFO(80, L"minification ratio:%d %d x %d -> %d x %d\n",
+                     ratio, width, height, tmpwidth, tmpheight);
+          }
+        }
+
+        if (ratio > 1) {
+          if (!_txReSample->minify(&tex, &width, &height, ratio)) {
+            free(tex);
+            tex = NULL;
+            DBG_INFO(80, L"Error: minification failed!\n");
+            continue;
+          }
+        }
+      }
+
+      /* tiling */
+      if ((_options & TILE_HIRESTEX) && _maxwidth >= 256 && _maxheight >= 256) {
+        boolean usetile = 0;
+
+        /* to tile or not to tile, that is the question */
+        if (width > 256 && height <= 128 && (((width - 1) >> 8) + 1) * height <= 256) {
+
+          if (width > _maxwidth) usetile = 1;
+          else {
+            /* tile if the tiled texture memory footprint is smaller */
+            int tilewidth  = 256;
+            int tileheight = _txReSample->nextPow2((((width - 1) >> 8) + 1) * height);
+            tmpwidth  = width;
+            tmpheight = height;
+
+            /* 3dfx Glide3 tmpheight, W:H aspect ratio range (8:1 - 1:8) */
+            if (tilewidth > (tileheight << 3)) tileheight = tilewidth >> 3;
+
+            /* HACKALERT: see TxReSample::pow2(); */
+            if      (tmpwidth  > 64) tmpwidth  -= 4;
+            else if (tmpwidth  > 16) tmpwidth  -= 2;
+            else if (tmpwidth  >  4) tmpwidth  -= 1;
+
+            if      (tmpheight > 64) tmpheight -= 4;
+            else if (tmpheight > 16) tmpheight -= 2;
+            else if (tmpheight >  4) tmpheight -= 1;
+
+            tmpwidth  = _txReSample->nextPow2(tmpwidth);
+            tmpheight = _txReSample->nextPow2(tmpheight);
+
+            /* 3dfx Glide3 tmpheight, W:H aspect ratio range (8:1 - 1:8) */
+            if (tmpwidth > tmpheight) {
+              if (tmpwidth  > (tmpheight << 3)) tmpheight = tmpwidth  >> 3;
+            } else {
+              if (tmpheight > (tmpwidth  << 3)) tmpwidth  = tmpheight >> 3;
+            }
+
+            usetile = (tilewidth * tileheight < tmpwidth * tmpheight);
+          }
+
+        }
+
+        /* tile it! do the actual tiling into 256x256 size */
+        if (usetile) {
+          DBG_INFO(80, L"Glide64 style texture tiling\n");
+
+          int x, y, z, ratio, offset;
+          offset = 0;
+          ratio = ((width - 1) >> 8) + 1;
+          tmptex = (uint8 *)malloc(_txUtil->sizeofTx(256, height * ratio, format));
+          if (tmptex) {
+            for (x = 0; x < ratio; x++) {
+              for (y = 0; y < height; y++) {
+                if (x < ratio - 1) {
+                  memcpy(&tmptex[offset << 2], &tex[(x * 256 + y * width) << 2], 256 << 2);
+                } else {
+                  for (z = 0; z < width - 256 * (ratio - 1); z++) {
+                    ((uint32*)tmptex)[offset + z] = ((uint32*)tex)[x * 256 + y * width + z];
+                  }
+                  for (; z < 256; z++) {
+                    ((uint32*)tmptex)[offset + z] = ((uint32*)tmptex)[offset + z - 1];
+                  }
+                }
+                offset += 256;
+              }
+            }
+            free(tex);
+            tex = tmptex;
+            untiled_width = width;
+            untiled_height = height;
+            width = 256;
+            height *= ratio;
+            DBG_INFO(80, L"Tiled: %d x %d -> %d x %d\n", untiled_width, untiled_height, width, height);
+          }
+        }
+      }
+
+#else  /* TEXTURE_TILING */
+
+      /* minification */
+      if (width > _maxwidth || height > _maxheight) {
+        int ratio = 1;
+        if (width / _maxwidth > height / _maxheight) {
+          ratio = (int)ceil((double)width / _maxwidth);
+        } else {
+          ratio = (int)ceil((double)height / _maxheight);
+        }
+        if (!_txReSample->minify(&tex, &width, &height, ratio)) {
+          free(tex);
+          tex = NULL;
+          DBG_INFO(80, L"Error: minification failed!\n");
+          continue;
+        }
+      }
+
+#endif /* TEXTURE_TILING */
+
+      /* texture compression */
+      if ((_options & COMPRESSION_MASK) &&
+          (width >= 64 && height >= 64) /* Texture compression is not suitable for low pixel coarse detail
+                                         * textures. The assumption here is that textures larger than 64x64
+                                         * have enough detail to produce decent quality when compressed. The
+                                         * down side is that narrow stripped textures that the N64 often use
+                                         * for large background textures are also ignored. It would be more
+                                         * reasonable if decisions are made based on fourier-transform
+                                         * spectrum or RMS error.
+                                         *
+                                         * NOTE: texture size must be checked before expanding to pow2 size.
+                                         */
+          ) {
+        uint32 alpha = 0;
+        int dataSize = 0;
+        int compressionType = _options & COMPRESSION_MASK;
+
+#if POW2_TEXTURES
+#if (POW2_TEXTURES == 2)
+        /* 3dfx Glide3x aspect ratio (8:1 - 1:8) */
+        if (!_txReSample->nextPow2(&tex, &width , &height, 32, 1)) {
+#else
+        /* normal pow2 expansion */
+        if (!_txReSample->nextPow2(&tex, &width , &height, 32, 0)) {
+#endif
+          free(tex);
+          tex = NULL;
+          DBG_INFO(80, L"Error: aspect ratio adjustment failed!\n");
+          continue;
+        }
+#endif
+
+        switch (_options & COMPRESSION_MASK) {
+        case S3TC_COMPRESSION:
+          switch (destformat) {
+          case GR_TEXFMT_ARGB_8888:
+#if GLIDE64_DXTN
+          case GR_TEXFMT_ARGB_1555: /* for ARGB1555 use DXT5 instead of DXT1 */
+#endif
+          case GR_TEXFMT_ALPHA_INTENSITY_88:
+            dataSize = width * height;
+            break;
+#if !GLIDE64_DXTN
+          case GR_TEXFMT_ARGB_1555:
+#endif
+          case GR_TEXFMT_RGB_565:
+          case GR_TEXFMT_INTENSITY_8:
+            dataSize = (width * height) >> 1;
+            break;
+          case GR_TEXFMT_ALPHA_8: /* no size benefit with dxtn */
+            ;
+          }
+          break;
+        case FXT1_COMPRESSION:
+          switch (destformat) {
+          case GR_TEXFMT_ARGB_1555:
+          case GR_TEXFMT_RGB_565:
+          case GR_TEXFMT_INTENSITY_8:
+            dataSize = (width * height) >> 1;
+            break;
+            /* XXX: textures that use 8bit alpha channel look bad with the current
+             * fxt1 library, so we substitute it with dxtn for now. afaik all gfx
+             * cards that support fxt1 also support dxtn. (3dfx and Intel) */
+          case GR_TEXFMT_ALPHA_INTENSITY_88:
+          case GR_TEXFMT_ARGB_8888:
+            compressionType = S3TC_COMPRESSION;
+            dataSize = width * height;
+            break;
+          case GR_TEXFMT_ALPHA_8: /* no size benefit with dxtn */
+            ;
+          }
+        }
+        /* compress it! */
+        if (dataSize) {
+#if 0 /* TEST: dither before compression for better results with gradients */
+          tmptex = (uint8 *)malloc(_txUtil->sizeofTx(width, height, destformat));
+          if (tmptex) {
+            if (_txQuantize->quantize(tex, tmptex, width, height, GR_TEXFMT_ARGB_8888, destformat, 0))
+              _txQuantize->quantize(tmptex, tex, width, height, destformat, GR_TEXFMT_ARGB_8888, 0);
+            free(tmptex);
+          }
+#endif
+          tmptex = (uint8 *)malloc(dataSize);
+          if (tmptex) {
+            if (_txQuantize->compress(tex, tmptex,
+                                      width, height, destformat,
+                                      &tmpwidth, &tmpheight, &tmpformat,
+                                      compressionType)) {
+              free(tex);
+              tex = tmptex;
+              width = tmpwidth;
+              height = tmpheight;
+              format = destformat = tmpformat;
+            } else {
+              free(tmptex);
+            }
+          }
+        }
+
+      } else {
+
+#if POW2_TEXTURES
+#if (POW2_TEXTURES == 2)
+        /* 3dfx Glide3x aspect ratio (8:1 - 1:8) */
+        if (!_txReSample->nextPow2(&tex, &width , &height, 32, 1)) {
+#else
+        /* normal pow2 expansion */
+        if (!_txReSample->nextPow2(&tex, &width , &height, 32, 0)) {
+#endif
+          free(tex);
+          tex = NULL;
+          DBG_INFO(80, L"Error: aspect ratio adjustment failed!\n");
+          continue;
+        }
+#endif
+      }
+
+      /* quantize */
+      {
+        tmptex = (uint8 *)malloc(_txUtil->sizeofTx(width, height, destformat));
+        if (tmptex) {
+          switch (destformat) {
+          case GR_TEXFMT_ARGB_8888:
+          case GR_TEXFMT_ARGB_4444:
+#if !REDUCE_TEXTURE_FOOTPRINT
+            if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX)
+#endif
+              destformat = GR_TEXFMT_ARGB_4444;
+            break;
+          case GR_TEXFMT_ARGB_1555:
+#if !REDUCE_TEXTURE_FOOTPRINT
+            if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX)
+#endif
+              destformat = GR_TEXFMT_ARGB_1555;
+            break;
+          case GR_TEXFMT_RGB_565:
+#if !REDUCE_TEXTURE_FOOTPRINT
+            if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX)
+#endif
+              destformat = GR_TEXFMT_RGB_565;
+            break;
+          case GR_TEXFMT_ALPHA_INTENSITY_88:
+          case GR_TEXFMT_ALPHA_INTENSITY_44:
+#if !REDUCE_TEXTURE_FOOTPRINT
+            destformat = GR_TEXFMT_ALPHA_INTENSITY_88;
+#else
+            destformat = GR_TEXFMT_ALPHA_INTENSITY_44;
+#endif
+            break;
+          case GR_TEXFMT_ALPHA_8:
+            destformat = GR_TEXFMT_ALPHA_8; /* yes, this is correct. ALPHA_8 instead of INTENSITY_8 */
+            break;
+          case GR_TEXFMT_INTENSITY_8:
+            destformat = GR_TEXFMT_INTENSITY_8;
+          }
+          if (_txQuantize->quantize(tex, tmptex, width, height, GR_TEXFMT_ARGB_8888, destformat, 0)) {
+            format = destformat;
+            free(tex);
+            tex = tmptex;
+          }
+        }
+      }
+
+    }
+
+
+    /* last minute validations */
+    if (!tex || !chksum || !width || !height || !format || width > _maxwidth || height > _maxheight) {
+#if !DEBUG
+      INFO(80, L"-----\n");
+      INFO(80, L"path: %ls\n", dir_path.string().c_str());
+      INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+      if (tex) {
+        free(tex);
+        tex = NULL;
+        INFO(80, L"Error: bad format or size! %d x %d gfmt:%x\n", width, height, format);
+      } else {
+        INFO(80, L"Error: load failed!!\n");
+      }
+      continue;
+    }
+
+    /* load it into hires texture cache. */
+    {
+      uint64 chksum64 = (uint64)palchksum;
+      chksum64 <<= 32;
+      chksum64 |= (uint64)chksum;
+
+      GHQTexInfo tmpInfo;
+      memset(&tmpInfo, 0, sizeof(GHQTexInfo));
+
+      tmpInfo.data = tex;
+      tmpInfo.width = width;
+      tmpInfo.height = height;
+      tmpInfo.format = format;
+      tmpInfo.largeLodLog2 = _txUtil->grLodLog2(width, height);
+      tmpInfo.smallLodLog2 = tmpInfo.largeLodLog2;
+      tmpInfo.aspectRatioLog2 = _txUtil->grAspectRatioLog2(width, height);
+      tmpInfo.is_hires_tex = 1;
+
+#if TEXTURE_TILING
+      /* Glide64 style texture tiling. */
+      if (untiled_width && untiled_height) {
+        tmpInfo.tiles = ((untiled_width - 1) >> 8) + 1;
+        tmpInfo.untiled_width = untiled_width;
+        tmpInfo.untiled_height = untiled_height;
+      }
+#endif
+
+      /* remove redundant in cache */
+      if (replace && TxCache::del(chksum64)) {
+        DBG_INFO(80, L"removed duplicate old cache.\n");
+      }
+
+      /* add to cache */
+      if (TxCache::add(chksum64, &tmpInfo)) {
+        /* Callback to display hires texture info.
+         * Gonetz <gonetz(at)ngs.ru> */
+        if (_callback) {
+          wchar_t tmpbuf[MAX_PATH];
+          mbstowcs(tmpbuf, fname, MAX_PATH);
+          (*_callback)(L"[%d] total mem:%.2fmb - %ls\n", _cache.size(), (float)_totalSize/1000000, tmpbuf);
+        }
+        DBG_INFO(80, L"texture loaded!\n");
+      }
+      free(tex);
+    }
+
+  }
+
+  CHDIR(curpath);
+#endif
+  return 1;
+}
diff --git a/Source/GlideHQ/TxHiResCache.h b/Source/GlideHQ/TxHiResCache.h
new file mode 100644
index 000000000..58868ffa6
--- /dev/null
+++ b/Source/GlideHQ/TxHiResCache.h
@@ -0,0 +1,59 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXHIRESCACHE_H__
+#define __TXHIRESCACHE_H__
+
+/* support hires textures
+ *   0: disable
+ *   1: enable
+ */
+#define HIRES_TEXTURE 1
+
+#include "TxCache.h"
+#include "TxQuantize.h"
+#include "TxImage.h"
+#include "TxReSample.h"
+
+class TxHiResCache : public TxCache
+{
+private:
+  int _maxwidth;
+  int _maxheight;
+  int _maxbpp;
+  boolean _haveCache;
+  boolean _abortLoad;
+  TxImage *_txImage;
+  TxQuantize *_txQuantize;
+  TxReSample *_txReSample;
+  boolean loadHiResTextures(LPCSTR dir_path, boolean replace);
+public:
+  ~TxHiResCache();
+  TxHiResCache(int maxwidth, int maxheight, int maxbpp, int options,
+               const wchar_t *path, const wchar_t *ident,
+               dispInfoFuncExt callback);
+  boolean empty();
+  boolean load(boolean replace);
+};
+
+#endif /* __TXHIRESCACHE_H__ */
diff --git a/Source/GlideHQ/TxImage.cpp b/Source/GlideHQ/TxImage.cpp
new file mode 100644
index 000000000..ba087d862
--- /dev/null
+++ b/Source/GlideHQ/TxImage.cpp
@@ -0,0 +1,799 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* use power of 2 texture size
+ * (0:disable, 1:enable, 2:3dfx) */
+#define POW2_TEXTURES 0
+
+/* check 8 bytes. use a larger value if needed. */
+#define PNG_CHK_BYTES 8
+
+#include "TxImage.h"
+#include "TxReSample.h"
+#include "TxDbg.h"
+#include <stdlib.h>
+
+boolean
+TxImage::getPNGInfo(FILE *fp, png_structp *png_ptr, png_infop *info_ptr)
+{
+  unsigned char sig[PNG_CHK_BYTES];
+
+  /* check for valid file pointer */
+  if (!fp)
+    return 0;
+
+  /* check if file is PNG */
+  if (fread(sig, 1, PNG_CHK_BYTES, fp) != PNG_CHK_BYTES)
+    return 0;
+
+  if (png_sig_cmp(sig, 0, PNG_CHK_BYTES) != 0)
+    return 0;
+
+  /* get PNG file info */
+  *png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  if (!*png_ptr)
+    return 0;
+
+  *info_ptr = png_create_info_struct(*png_ptr);
+  if (!*info_ptr) {
+    png_destroy_read_struct(png_ptr, NULL, NULL);
+    return 0;
+  }
+
+  if (setjmp(png_jmpbuf(*png_ptr))) {
+    DBG_INFO(80, L"error reading png!\n");
+    png_destroy_read_struct(png_ptr, info_ptr, NULL);
+    return 0;
+  }
+
+  png_init_io(*png_ptr, fp);
+  png_set_sig_bytes(*png_ptr, PNG_CHK_BYTES);
+  png_read_info(*png_ptr, *info_ptr);
+
+  return 1;
+}
+
+uint8*
+TxImage::readPNG(FILE* fp, int* width, int* height, uint16* format)
+{
+  /* NOTE: returned image format is GR_TEXFMT_ARGB_8888 */
+
+  png_structp png_ptr;
+  png_infop info_ptr;
+  uint8 *image = NULL;
+  int bit_depth, color_type, interlace_type, compression_type, filter_type,
+      row_bytes, o_width, o_height, num_pas;
+
+  /* initialize */
+  *width  = 0;
+  *height = 0;
+  *format = 0;
+
+  /* check if we have a valid png file */
+  if (!fp)
+    return NULL;
+
+  if (!getPNGInfo(fp, &png_ptr, &info_ptr)) {
+    INFO(80, L"error reading png file! png image is corrupt.\n");
+    return NULL;
+  }
+
+  png_get_IHDR(png_ptr, info_ptr,
+               (png_uint_32*)&o_width, (png_uint_32*)&o_height, &bit_depth, &color_type,
+               &interlace_type, &compression_type, &filter_type);
+
+  DBG_INFO(80, L"png format %d x %d bitdepth:%d color:%x interlace:%x compression:%x filter:%x\n",
+           o_width, o_height, bit_depth, color_type,
+           interlace_type, compression_type, filter_type);
+
+  /* transformations */
+
+  /* Rice hi-res textures
+   * _all.png
+   * _rgb.png, _a.png
+   * _ciByRGBA.png
+   * _allciByRGBA.png
+   */
+
+  /* strip if color channel is larger than 8 bits */
+  if (bit_depth > 8) {
+    png_set_strip_16(png_ptr);
+    bit_depth = 8;
+  }
+
+#if 1
+  /* These are not really required per Rice format spec,
+   * but is done just in case someone uses them.
+   */
+  /* convert palette color to rgb color */
+  if (color_type == PNG_COLOR_TYPE_PALETTE) {
+    png_set_palette_to_rgb(png_ptr);
+    color_type = PNG_COLOR_TYPE_RGB;
+  }
+
+  /* expand 1,2,4 bit gray scale to 8 bit gray scale */
+  if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
+    png_set_expand_gray_1_2_4_to_8(png_ptr);
+
+  /* convert gray scale or gray scale + alpha to rgb color */
+  if (color_type == PNG_COLOR_TYPE_GRAY ||
+      color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
+    png_set_gray_to_rgb(png_ptr);
+    color_type = PNG_COLOR_TYPE_RGB;
+  }
+#endif
+
+  /* add alpha channel if any */
+  if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) {
+    png_set_tRNS_to_alpha(png_ptr);
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+  }
+
+  /* convert rgb to rgba */
+  if (color_type == PNG_COLOR_TYPE_RGB) {
+    png_set_filler(png_ptr, 0xff, PNG_FILLER_AFTER);
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+  }
+
+  /* punt invalid formats */
+  if (color_type != PNG_COLOR_TYPE_RGB_ALPHA) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+    DBG_INFO(80, L"Error: not PNG_COLOR_TYPE_RGB_ALPHA format!\n");
+    return NULL;
+  }
+
+  /*png_color_8p sig_bit;
+  if (png_get_sBIT(png_ptr, info_ptr, &sig_bit))
+    png_set_shift(png_ptr, sig_bit);*/
+
+  /* convert rgba to bgra */
+  png_set_bgr(png_ptr);
+
+  /* turn on interlace handling to cope with the weirdness
+   * of texture authors using interlaced format */
+  num_pas = png_set_interlace_handling(png_ptr);
+
+  /* update info structure */
+  png_read_update_info(png_ptr, info_ptr);
+
+  /* we only get here if ARGB8888 */
+  row_bytes = png_get_rowbytes(png_ptr, info_ptr);
+
+  /* allocate memory to read in image */
+  image = (uint8*)malloc(row_bytes * o_height);
+
+  /* read in image */
+  if (image) {
+    int pas, i;
+    uint8* tmpimage;
+
+    for (pas = 0; pas < num_pas; pas++) { /* deal with interlacing */
+      tmpimage = image;
+
+      for (i = 0; i < o_height; i++) {
+        /* copy row */
+        png_read_rows(png_ptr, &tmpimage, NULL, 1);
+        tmpimage += row_bytes;
+      }
+    }
+
+    /* read rest of the info structure */
+    png_read_end(png_ptr, info_ptr);
+
+    *width = (row_bytes >> 2);
+    *height = o_height;
+    *format = GR_TEXFMT_ARGB_8888;
+
+#if POW2_TEXTURES
+    /* next power of 2 size conversions */
+    /* NOTE: I can do this in the above loop for faster operations, but some
+     * texture packs require a workaround. see HACKALERT in nextPow2().
+     */
+
+    TxReSample txReSample = new TxReSample; // XXX: temporary. move to a better place.
+
+#if (POW2_TEXTURES == 2)
+    if (!txReSample->nextPow2(&image, width, height, 32, 1)) {
+#else
+    if (!txReSample->nextPow2(&image, width, height, 32, 0)) {
+#endif
+      if (image) {
+        free(image);
+        image = NULL;
+      }
+      *width = 0;
+      *height = 0;
+      *format = 0;
+    }
+
+    delete txReSample;
+
+#endif /* POW2_TEXTURES */
+  }
+
+  /* clean up */
+  png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+
+#ifdef DEBUG
+  if (!image) {
+    DBG_INFO(80, L"Error: failed to load png image!\n");
+  }
+#endif
+
+  return image;
+}
+
+boolean
+TxImage::writePNG(uint8* src, FILE* fp, int width, int height, int rowStride, uint16 format, uint8 *palette)
+{
+  png_structp png_ptr;
+  png_infop info_ptr;
+  png_color_8 sig_bit;
+  png_colorp palette_ptr;
+  png_bytep trans_ptr;//, tex_ptr;
+  int bit_depth, color_type, row_bytes, num_palette;
+  int i;
+  //uint16 srcfmt, destfmt;
+
+  if (!src || !fp)
+    return 0;
+
+  png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  if (png_ptr == NULL)
+    return 0;
+
+  info_ptr = png_create_info_struct(png_ptr);
+  if (info_ptr == NULL) {
+    png_destroy_write_struct(&png_ptr, NULL);
+    return 0;
+  }
+
+  /*if (setjmp(png_ptr->jmpbuf)) {
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+    return 0;
+  }*/
+
+  png_init_io(png_ptr, fp);
+
+  /* TODO: images must be converted to RGBA8888 or CI8,
+   * palettes need to be separated to A and RGB. */
+
+  /* N64 formats
+   * Format: 0 - RGBA, 1 - YUV, 2 - CI, 3 - IA, 4 - I
+   * Size:   0 - 4bit, 1 - 8bit, 2 - 16bit, 3 - 32 bit
+   * format = (Format << 8 | Size);
+   */
+
+  /* each channel is saved in 8bits for consistency */
+  switch (format) {
+  case 0x0002:/* RGBA5551 */
+    bit_depth = 8;
+    sig_bit.red   = 5;
+    sig_bit.green = 5;
+    sig_bit.blue  = 5;
+    sig_bit.alpha = 1;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0003:/* RGBA8888 */
+  case 0x0302:/* IA88 */
+    bit_depth = 8;
+    sig_bit.red   = 8;
+    sig_bit.green = 8;
+    sig_bit.blue  = 8;
+    sig_bit.alpha = 8;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0300:/* IA31 */
+    bit_depth = 8;
+    sig_bit.red   = 3;
+    sig_bit.green = 3;
+    sig_bit.blue  = 3;
+    sig_bit.alpha = 1;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0301:/* IA44 */
+    bit_depth = 8;
+    sig_bit.red   = 4;
+    sig_bit.green = 4;
+    sig_bit.blue  = 4;
+    sig_bit.alpha = 4;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0400:/* I4 */
+    bit_depth = 8;
+    sig_bit.red   = 4;
+    sig_bit.green = 4;
+    sig_bit.blue  = 4;
+    color_type = PNG_COLOR_TYPE_RGB;
+    break;
+  case 0x0401:/* I8 */
+  case 0x0402:/* I16 */
+    bit_depth = 8;
+    sig_bit.red   = 8;
+    sig_bit.green = 8;
+    sig_bit.blue  = 8;
+    color_type = PNG_COLOR_TYPE_RGB;
+    break;
+  case 0x0200:/* CI4 */
+    bit_depth = 8;
+    num_palette = 16;
+    color_type = PNG_COLOR_TYPE_PALETTE;
+    break;
+  case 0x0201:/* CI8 */
+    bit_depth = 8;
+    num_palette = 256;
+    color_type = PNG_COLOR_TYPE_PALETTE;
+    break;
+  case 0x0102:/* YUV ? */
+  case 0x0103:
+  default:
+    /* unsupported format */
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+    return 0;
+  }
+
+  switch (color_type) {
+  case PNG_COLOR_TYPE_RGB_ALPHA:
+  case PNG_COLOR_TYPE_RGB:
+    //row_bytes = (bit_depth * width) >> 1;
+    row_bytes = rowStride;
+    png_set_bgr(png_ptr);
+    png_set_sBIT(png_ptr, info_ptr, &sig_bit);
+    break;
+  case PNG_COLOR_TYPE_PALETTE:
+    //row_bytes = (bit_depth * width) >> 3;
+    row_bytes = rowStride;
+    png_set_PLTE(png_ptr, info_ptr, palette_ptr, num_palette);
+    png_set_tRNS(png_ptr, info_ptr, trans_ptr, num_palette, 0);
+  }
+
+  //png_set_filter(png_ptr, 0, PNG_ALL_FILTERS);
+
+  //if (bit_depth == 16)
+  //  png_set_swap(png_ptr);
+
+  //if (bit_depth < 8)
+  //  png_set_packswap(png_ptr);
+
+  png_set_IHDR(png_ptr, info_ptr, width, height,
+               bit_depth, color_type, PNG_INTERLACE_NONE,
+               PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+  //png_set_gAMA(png_ptr, info_ptr, 1.0);
+
+  png_write_info(png_ptr, info_ptr);
+  for (i = 0; i < height; i++) {
+    png_write_row(png_ptr, (png_bytep)src);
+    src += row_bytes;
+  }
+  png_write_end(png_ptr, info_ptr);
+
+  png_destroy_write_struct(&png_ptr, &info_ptr);
+
+  //if (tex_ptr) delete [] tex_ptr;
+
+  return 1;
+}
+
+boolean
+TxImage::getBMPInfo(FILE* fp, BITMAPFILEHEADER* bmp_fhdr, BITMAPINFOHEADER* bmp_ihdr)
+{
+  /*
+   * read in BITMAPFILEHEADER
+   */
+
+  /* is this a BMP file? */
+  if (fread(&bmp_fhdr->bfType, 2, 1, fp) != 1)
+    return 0;
+
+  if (memcmp(&bmp_fhdr->bfType, "BM", 2) != 0)
+    return 0;
+
+  /* get file size */
+  if (fread(&bmp_fhdr->bfSize, 4, 1, fp) != 1)
+    return 0;
+
+  /* reserved 1 */
+  if (fread(&bmp_fhdr->bfReserved1, 2, 1, fp) != 1)
+    return 0;
+
+  /* reserved 2 */
+  if (fread(&bmp_fhdr->bfReserved2, 2, 1, fp) != 1)
+    return 0;
+
+  /* offset to the image data */
+  if (fread(&bmp_fhdr->bfOffBits, 4, 1, fp) != 1)
+    return 0;
+
+  /*
+   * read in BITMAPINFOHEADER
+   */
+
+  /* size of BITMAPINFOHEADER */
+  if (fread(&bmp_ihdr->biSize, 4, 1, fp) != 1)
+    return 0;
+
+  /* is this a Windows BMP? */
+  if (bmp_ihdr->biSize != 40)
+    return 0;
+
+  /* width of the bitmap in pixels */
+  if (fread(&bmp_ihdr->biWidth, 4, 1, fp) != 1)
+    return 0;
+
+  /* height of the bitmap in pixels */
+  if (fread(&bmp_ihdr->biHeight, 4, 1, fp) != 1)
+    return 0;
+
+  /* number of planes (always 1) */
+  if (fread(&bmp_ihdr->biPlanes, 2, 1, fp) != 1)
+    return 0;
+
+  /* number of bits-per-pixel. (1, 4, 8, 16, 24, 32) */
+  if (fread(&bmp_ihdr->biBitCount, 2, 1, fp) != 1)
+    return 0;
+
+  /* compression for a compressed bottom-up bitmap
+   *   0 : uncompressed format
+   *   1 : run-length encoded 4 bpp format
+   *   2 : run-length encoded 8 bpp format
+   *   3 : bitfield
+   */
+  if (fread(&bmp_ihdr->biCompression, 4, 1, fp) != 1)
+    return 0;
+
+  /* size of the image in bytes */
+  if (fread(&bmp_ihdr->biSizeImage, 4, 1, fp) != 1)
+    return 0;
+
+  /* horizontal resolution in pixels-per-meter */
+  if (fread(&bmp_ihdr->biXPelsPerMeter, 4, 1, fp) != 1)
+    return 0;
+
+  /* vertical resolution in pixels-per-meter */
+  if (fread(&bmp_ihdr->biYPelsPerMeter, 4, 1, fp) != 1)
+    return 0;
+
+  /* number of color indexes in the color table that are actually used */
+  if (fread(&bmp_ihdr->biClrUsed, 4, 1, fp) != 1)
+    return 0;
+
+  /*  the number of color indexes that are required for displaying */
+  if (fread(&bmp_ihdr->biClrImportant, 4, 1, fp) != 1)
+    return 0;
+
+  return 1;
+}
+
+uint8*
+TxImage::readBMP(FILE* fp, int* width, int* height, uint16* format)
+{
+  /* NOTE: returned image format;
+   *       4, 8bit palette bmp -> GR_TEXFMT_P_8
+   *       24, 32bit bmp -> GR_TEXFMT_ARGB_8888
+   */
+
+  uint8 *image = NULL;
+  uint8 *image_row = NULL;
+  uint8 *tmpimage = NULL;
+  int row_bytes, pos, i, j;
+  /* Windows Bitmap */
+  BITMAPFILEHEADER bmp_fhdr;
+  BITMAPINFOHEADER bmp_ihdr;
+
+  /* initialize */
+  *width  = 0;
+  *height = 0;
+  *format = 0;
+
+  /* check if we have a valid bmp file */
+  if (!fp)
+    return NULL;
+
+  if (!getBMPInfo(fp, &bmp_fhdr, &bmp_ihdr)) {
+    INFO(80, L"error reading bitmap file! bitmap image is corrupt.\n");
+    return NULL;
+  }
+
+  DBG_INFO(80, L"bmp format %d x %d bitdepth:%d compression:%x offset:%d\n",
+           bmp_ihdr.biWidth, bmp_ihdr.biHeight, bmp_ihdr.biBitCount,
+           bmp_ihdr.biCompression, bmp_fhdr.bfOffBits);
+
+  /* rowStride in bytes */
+  row_bytes = (bmp_ihdr.biWidth * bmp_ihdr.biBitCount) >> 3;
+  /* align to 4bytes boundary */
+  row_bytes = (row_bytes + 3) & ~3;
+
+  /* Rice hi-res textures */
+  if (!(bmp_ihdr.biBitCount == 8 || bmp_ihdr.biBitCount == 4 || bmp_ihdr.biBitCount == 32 || bmp_ihdr.biBitCount == 24) ||
+      bmp_ihdr.biCompression != 0) {
+    DBG_INFO(80, L"Error: incompatible bitmap format!\n");
+    return NULL;
+  }
+
+  switch (bmp_ihdr.biBitCount) {
+  case 8:
+  case 32:
+    /* 8 bit, 32 bit bitmap */
+    image = (uint8*)malloc(row_bytes * bmp_ihdr.biHeight);
+    if (image) {
+      tmpimage = image;
+      pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1);
+      for (i = 0; i < bmp_ihdr.biHeight; i++) {
+        /* read in image */
+        fseek(fp, pos, SEEK_SET);
+        fread(tmpimage, row_bytes, 1, fp);
+        tmpimage += row_bytes;
+        pos -= row_bytes;
+      }
+    }
+    break;
+  case 4:
+    /* 4bit bitmap */
+    image = (uint8*)malloc((row_bytes * bmp_ihdr.biHeight) << 1);
+    image_row = (uint8*)malloc(row_bytes);
+    if (image && image_row) {
+      tmpimage = image;
+      pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1);
+      for (i = 0; i < bmp_ihdr.biHeight; i++) {
+        /* read in image */
+        fseek(fp, pos, SEEK_SET);
+        fread(image_row, row_bytes, 1, fp);
+        /* expand 4bpp to 8bpp. stuff 4bit values into 8bit comps. */
+        for (j = 0; j < row_bytes; j++) {
+          tmpimage[j << 1] = image_row[j] & 0x0f;
+          tmpimage[(j << 1) + 1] = (image_row[j] & 0xf0) >> 4;
+        }
+        tmpimage += (row_bytes << 1);
+        pos -= row_bytes;
+      }
+      free(image_row);
+    } else {
+      if (image_row) free(image_row);
+      if (image) free(image);
+      image = NULL;
+    }
+    break;
+  case 24:
+    /* 24 bit bitmap */
+    image = (uint8*)malloc((bmp_ihdr.biWidth * bmp_ihdr.biHeight) << 2);
+    image_row = (uint8*)malloc(row_bytes);
+    if (image && image_row) {
+      tmpimage = image;
+      pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1);
+      for (i = 0; i < bmp_ihdr.biHeight; i++) {
+        /* read in image */
+        fseek(fp, pos, SEEK_SET);
+        fread(image_row, row_bytes, 1, fp);
+        /* convert 24bpp to 32bpp. */
+        for (j = 0; j < bmp_ihdr.biWidth; j++) {
+          tmpimage[(j << 2)]     = image_row[j * 3];
+          tmpimage[(j << 2) + 1] = image_row[j * 3 + 1];
+          tmpimage[(j << 2) + 2] = image_row[j * 3 + 2];
+          tmpimage[(j << 2) + 3] = 0xFF;
+        }
+        tmpimage += (bmp_ihdr.biWidth << 2);
+        pos -= row_bytes;
+      }
+      free(image_row);
+    } else {
+      if (image_row) free(image_row);
+      if (image) free(image);
+      image = NULL;
+    }
+  }
+
+  if (image) {
+    *width = (row_bytes << 3) / bmp_ihdr.biBitCount;
+    *height = bmp_ihdr.biHeight;
+
+    switch (bmp_ihdr.biBitCount) {
+    case 8:
+    case 4:
+      *format = GR_TEXFMT_P_8;
+      break;
+    case 32:
+    case 24:
+      *format = GR_TEXFMT_ARGB_8888;
+    }
+
+#if POW2_TEXTURES
+    /* next power of 2 size conversions */
+    /* NOTE: I can do this in the above loop for faster operations, but some
+     * texture packs require a workaround. see HACKALERT in nextPow2().
+     */
+
+    TxReSample txReSample = new TxReSample; // XXX: temporary. move to a better place.
+
+#if (POW2_TEXTURES == 2)
+    if (!txReSample->nextPow2(&image, width, height, 8, 1)) {
+#else
+    if (!txReSample->nextPow2(&image, width, height, 8, 0)) {
+#endif
+      if (image) {
+        free(image);
+        image = NULL;
+      }
+      *width = 0;
+      *height = 0;
+      *format = 0;
+    }
+
+    delete txReSample;
+
+#endif /* POW2_TEXTURES */
+  }
+
+#ifdef DEBUG
+  if (!image) {
+    DBG_INFO(80, L"Error: failed to load bmp image!\n");
+  }
+#endif
+
+  return image;
+}
+
+boolean
+TxImage::getDDSInfo(FILE *fp, DDSFILEHEADER *dds_fhdr)
+{
+  /*
+   * read in DDSFILEHEADER
+   */
+
+  /* is this a DDS file? */
+  if (fread(&dds_fhdr->dwMagic, 4, 1, fp) != 1)
+    return 0;
+
+  if (memcmp(&dds_fhdr->dwMagic, "DDS ", 4) != 0)
+    return 0;
+
+  if (fread(&dds_fhdr->dwSize, 4, 1, fp) != 1)
+    return 0;
+
+  /* get file flags */
+  if (fread(&dds_fhdr->dwFlags, 4, 1, fp) != 1)
+    return 0;
+
+  /* height of dds in pixels */
+  if (fread(&dds_fhdr->dwHeight, 4, 1, fp) != 1)
+    return 0;
+
+  /* width of dds in pixels */
+  if (fread(&dds_fhdr->dwWidth, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwLinearSize, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwDepth, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwMipMapCount, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwReserved1, 4 * 11, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwSize, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwFlags, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwFourCC, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwRGBBitCount, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwRBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwGBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwBBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwRGBAlphaBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwCaps1, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwCaps2, 4, 1, fp) != 1)
+    return 0;
+
+  return 1;
+}
+
+uint8*
+TxImage::readDDS(FILE* fp, int* width, int* height, uint16* format)
+{
+  uint8 *image = NULL;
+  DDSFILEHEADER dds_fhdr;
+  uint16 tmpformat = 0;
+
+  /* initialize */
+  *width  = 0;
+  *height = 0;
+  *format = 0;
+
+  /* check if we have a valid dds file */
+  if (!fp)
+    return NULL;
+
+  if (!getDDSInfo(fp, &dds_fhdr)) {
+    INFO(80, L"error reading dds file! dds image is corrupt.\n");
+    return NULL;
+  }
+
+  DBG_INFO(80, L"dds format %d x %d HeaderSize %d LinearSize %d\n",
+           dds_fhdr.dwWidth, dds_fhdr.dwHeight, dds_fhdr.dwSize, dds_fhdr.dwLinearSize);
+
+  if (!(dds_fhdr.dwFlags & (DDSD_CAPS|DDSD_WIDTH|DDSD_HEIGHT|DDSD_PIXELFORMAT|DDSD_LINEARSIZE))) {
+    DBG_INFO(80, L"Error: incompatible dds format!\n");
+    return NULL;
+  }
+
+  if ((dds_fhdr.dwFlags & DDSD_MIPMAPCOUNT) && dds_fhdr.dwMipMapCount != 1) {
+    DBG_INFO(80, L"Error: mipmapped dds not supported!\n");
+    return NULL;
+  }
+
+  if (!((dds_fhdr.ddpf.dwFlags & DDPF_FOURCC) && dds_fhdr.dwCaps2 == 0)) {
+    DBG_INFO(80, L"Error: not fourcc standard texture!\n");
+    return NULL;
+  }
+
+  if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT1", 4) == 0) {
+    DBG_INFO(80, L"DXT1 format\n");
+    /* compensate for missing LinearSize */
+    dds_fhdr.dwLinearSize = (dds_fhdr.dwWidth * dds_fhdr.dwHeight) >> 1;
+    tmpformat = GR_TEXFMT_ARGB_CMP_DXT1;
+  } else if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT3", 4) == 0) {
+    DBG_INFO(80, L"DXT3 format\n");
+    dds_fhdr.dwLinearSize = dds_fhdr.dwWidth * dds_fhdr.dwHeight;
+    tmpformat = GR_TEXFMT_ARGB_CMP_DXT3;
+  } else if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT5", 4) == 0) {
+    DBG_INFO(80, L"DXT5 format\n");
+    dds_fhdr.dwLinearSize = dds_fhdr.dwWidth * dds_fhdr.dwHeight;
+    tmpformat = GR_TEXFMT_ARGB_CMP_DXT5;
+  } else {
+    DBG_INFO(80, L"Error: not DXT1 or DXT3 or DXT5 format!\n");
+    return NULL;
+  }
+
+  /* read in image */
+  image = (uint8*)malloc(dds_fhdr.dwLinearSize);
+  if (image) {
+    *width  = dds_fhdr.dwWidth;
+    *height = dds_fhdr.dwHeight;
+    *format = tmpformat;
+
+    fseek(fp, 128, SEEK_SET); /* size of header is 128 bytes */
+    fread(image, dds_fhdr.dwLinearSize, 1, fp);
+  }
+
+  return image;
+}
diff --git a/Source/GlideHQ/TxImage.h b/Source/GlideHQ/TxImage.h
new file mode 100644
index 000000000..64331f537
--- /dev/null
+++ b/Source/GlideHQ/TxImage.h
@@ -0,0 +1,116 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXIMAGE_H__
+#define __TXIMAGE_H__
+
+#include <stdio.h>
+#include <png/png.h>
+#include "TxInternal.h"
+
+#ifndef WIN32
+typedef struct tagBITMAPFILEHEADER {
+  unsigned short bfType;
+  unsigned long  bfSize;
+  unsigned short bfReserved1;
+  unsigned short bfReserved2;
+  unsigned long  bfOffBits;
+} BITMAPFILEHEADER;
+
+typedef struct tagBITMAPINFOHEADER {
+  unsigned long  biSize;
+  long           biWidth;
+  long           biHeight;
+  unsigned short biPlanes;
+  unsigned short biBitCount;
+  unsigned long  biCompression;
+  unsigned long  biSizeImage;
+  long           biXPelsPerMeter;
+  long           biYPelsPerMeter;
+  unsigned long  biClrUsed;
+  unsigned long  biClrImportant;
+} BITMAPINFOHEADER;
+#else
+typedef struct tagBITMAPFILEHEADER BITMAPFILEHEADER;
+typedef struct tagBITMAPINFOHEADER BITMAPINFOHEADER;
+#endif
+
+#define DDSD_CAPS	0x00000001
+#define DDSD_HEIGHT	0x00000002
+#define DDSD_WIDTH	0x00000004
+#define DDSD_PITCH	0x00000008
+#define DDSD_PIXELFORMAT	0x00001000
+#define DDSD_MIPMAPCOUNT	0x00020000
+#define DDSD_LINEARSIZE	0x00080000
+#define DDSD_DEPTH	0x00800000
+
+#define DDPF_ALPHAPIXELS	0x00000001
+#define DDPF_FOURCC	0x00000004
+#define DDPF_RGB	0x00000040
+
+#define DDSCAPS_COMPLEX	0x00000008
+#define DDSCAPS_TEXTURE	0x00001000
+#define DDSCAPS_MIPMAP	0x00400000
+
+typedef struct tagDDSPIXELFORMAT {
+  unsigned long dwSize;
+  unsigned long dwFlags;
+  unsigned long dwFourCC;
+  unsigned long dwRGBBitCount;
+  unsigned long dwRBitMask;
+  unsigned long dwGBitMask;
+  unsigned long dwBBitMask;
+  unsigned long dwRGBAlphaBitMask;
+} DDSPIXELFORMAT;
+
+typedef struct tagDDSFILEHEADER {
+  unsigned long dwMagic;
+  unsigned long dwSize;
+  unsigned long dwFlags;
+  unsigned long dwHeight;
+  unsigned long dwWidth;
+  unsigned long dwLinearSize;
+  unsigned long dwDepth;
+  unsigned long dwMipMapCount;
+  unsigned long dwReserved1[11];
+  DDSPIXELFORMAT ddpf;
+  unsigned long dwCaps1;
+  unsigned long dwCaps2;
+} DDSFILEHEADER;
+
+class TxImage
+{
+private:
+  boolean getPNGInfo(FILE *fp, png_structp *png_ptr, png_infop *info_ptr);
+  boolean getBMPInfo(FILE *fp, BITMAPFILEHEADER *bmp_fhdr, BITMAPINFOHEADER *bmp_ihdr);
+  boolean getDDSInfo(FILE *fp, DDSFILEHEADER *dds_fhdr);
+public:
+  TxImage() {}
+  ~TxImage() {}
+  uint8* readPNG(FILE* fp, int* width, int* height, uint16* format);
+  boolean writePNG(uint8* src, FILE* fp, int width, int height, int rowStride, uint16 format, uint8 *palette);
+  uint8* readBMP(FILE* fp, int* width, int* height, uint16* format);
+  uint8* readDDS(FILE* fp, int* width, int* height, uint16* format);
+};
+
+#endif /* __TXIMAGE_H__ */
diff --git a/Source/GlideHQ/TxInternal.h b/Source/GlideHQ/TxInternal.h
new file mode 100644
index 000000000..3f0be6d9d
--- /dev/null
+++ b/Source/GlideHQ/TxInternal.h
@@ -0,0 +1,100 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INTERNAL_H__
+#define __INTERNAL_H__
+
+#include "Ext_TxFilter.h"
+
+/* dll exports */
+#ifdef TXFILTER_DLL
+#define TAPI __declspec(dllexport)
+#define TAPIENTRY
+#else
+#define TAPI
+#define TAPIENTRY
+#endif
+
+typedef unsigned char  uint8;
+typedef unsigned short uint16;
+typedef unsigned long  uint32;
+
+#ifdef WIN32
+#define KBHIT(key) ((GetAsyncKeyState(key) & 0x8001) == 0x8001)
+#else
+#define KBHIT(key) (0)
+#endif
+
+/* from OpenGL glext.h */
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT   0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT  0x83F1
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT  0x83F2
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT  0x83F3
+
+/* for explicit fxt1 compression */
+#define CC_CHROMA 0x0
+#define CC_HI     0x1
+#define CC_ALPHA  0x2
+
+/* in-memory zlib texture compression */
+#define GR_TEXFMT_GZ                 0x8000
+
+#if 0 /* this is here to remind me of other formats */
+/* from 3Dfx Interactive Inc. glide.h */
+#define GR_TEXFMT_8BIT                  0x0
+#define GR_TEXFMT_RGB_332               GR_TEXFMT_8BIT
+#define GR_TEXFMT_YIQ_422               0x1
+#define GR_TEXFMT_ALPHA_8               0x2 /* (0..0xFF) alpha     */
+#define GR_TEXFMT_INTENSITY_8           0x3 /* (0..0xFF) intensity */
+#define GR_TEXFMT_ALPHA_INTENSITY_44    0x4
+#define GR_TEXFMT_P_8                   0x5 /* 8-bit palette */
+#define GR_TEXFMT_RSVD0                 0x6 /* GR_TEXFMT_P_8_RGBA */
+#define GR_TEXFMT_P_8_6666              GR_TEXFMT_RSVD0
+#define GR_TEXFMT_P_8_6666_EXT          GR_TEXFMT_RSVD0
+#define GR_TEXFMT_RSVD1                 0x7
+#define GR_TEXFMT_16BIT                 0x8
+#define GR_TEXFMT_ARGB_8332             GR_TEXFMT_16BIT
+#define GR_TEXFMT_AYIQ_8422             0x9
+#define GR_TEXFMT_RGB_565               0xa
+#define GR_TEXFMT_ARGB_1555             0xb
+#define GR_TEXFMT_ARGB_4444             0xc
+#define GR_TEXFMT_ALPHA_INTENSITY_88    0xd
+#define GR_TEXFMT_AP_88                 0xe /* 8-bit alpha 8-bit palette */
+#define GR_TEXFMT_RSVD2                 0xf
+#define GR_TEXFMT_RSVD4                 GR_TEXFMT_RSVD2
+
+/* from 3Dfx Interactive Inc. g3ext.h */
+#define GR_TEXFMT_ARGB_CMP_FXT1        0x11
+#define GR_TEXFMT_ARGB_8888            0x12
+#define GR_TEXFMT_YUYV_422             0x13
+#define GR_TEXFMT_UYVY_422             0x14
+#define GR_TEXFMT_AYUV_444             0x15
+#define GR_TEXFMT_ARGB_CMP_DXT1        0x16
+#define GR_TEXFMT_ARGB_CMP_DXT2        0x17
+#define GR_TEXFMT_ARGB_CMP_DXT3        0x18
+#define GR_TEXFMT_ARGB_CMP_DXT4        0x19
+#define GR_TEXFMT_ARGB_CMP_DXT5        0x1A
+#define GR_TEXTFMT_RGB_888             0xFF
+#endif
+
+#endif /* __INTERNAL_H__ */
diff --git a/Source/GlideHQ/TxQuantize.cpp b/Source/GlideHQ/TxQuantize.cpp
new file mode 100644
index 000000000..09100f4ef
--- /dev/null
+++ b/Source/GlideHQ/TxQuantize.cpp
@@ -0,0 +1,2405 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef WIN32
+#pragma warning(disable: 4786)
+#endif
+
+/* NOTE: The codes are not optimized. They can be made faster. */
+
+#include "TxQuantize.h"
+
+TxQuantize::TxQuantize()
+{
+  _txUtil = new TxUtil();
+
+  /* get number of CPU cores. */
+  _numcore = _txUtil->getNumberofProcessors();
+
+  /* get dxtn extensions */
+  _tx_compress_fxt1 = TxLoadLib::getInstance()->getfxtCompressTexFuncExt();
+  _tx_compress_dxtn = TxLoadLib::getInstance()->getdxtCompressTexFuncExt();
+}
+
+
+TxQuantize::~TxQuantize()
+{
+  delete _txUtil;
+}
+
+void
+TxQuantize::ARGB1555_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0x00008000) ? 0xff000000 : 0x00000000) |
+            ((*src & 0x00007c00) << 9) | ((*src & 0x00007000) << 4) |
+            ((*src & 0x000003e0) << 6) | ((*src & 0x00000380) << 1) |
+            ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
+    dest++;
+    *dest = (((*src & 0x80000000) ? 0xff000000 : 0x00000000) |
+            ((*src & 0x7c000000) >>  7) | ((*src & 0x70000000) >> 12) |
+            ((*src & 0x03e00000) >> 10) | ((*src & 0x03800000) >> 15) |
+            ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // arrr rrgg gggb bbbb
+    // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;         // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    mov ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent1;
+    mov ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent1:
+    mov eax, edx;         // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shl edx, 4;           // edx = 00000000 00000rrr rr000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000rrr rr000000 00000000
+    shl edx, 5;           // edx = 00000000 rrrrr000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 3;           // edx = 00000000 00000000 00000000 bbbbb000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000bbb
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    shr eax, 16;          // eax = 00000000 00000000 arrrrrgg gggbbbbb
+    mov edx, eax;         // edx = 00000000 00000000 arrrrrgg gggbbbbb
+    mov ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent2;
+    mov ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent2:
+    mov eax, edx;         // eax = 00000000 00000000 arrrrrgg gggbbbbb
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shl edx, 4;           // edx = 00000000 00000rrr rr000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000rrr rr000000 00000000
+    shl edx, 5;           // edx = 00000000 rrrrr000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 3;           // edx = 00000000 00000000 00000000 bbbbb000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000bbb
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB4444_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = ((*src & 0x0000f000) << 12) |
+            ((*src & 0x00000f00) << 8) |
+            ((*src & 0x000000f0) << 4) |
+             (*src & 0x0000000f);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = ((*src & 0xf0000000) |
+            ((*src & 0x0f000000) >> 4) |
+            ((*src & 0x00f00000) >> 8) |
+            ((*src & 0x000f0000) >> 12));
+    *dest |= (*dest >> 4);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaa rrrr gggg bbbb
+    // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shl ebx, 8;          // 00000000 0000rrrr 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and eax, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
+    or  eax, ebx;        // 0000aaaa 0000rrrr 0000gggg 0000bbbb
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 rrrr0000 gggg0000 bbbb0000
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    shr edx, 16;
+    mov ebx, edx;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shl ebx, 8;          // 00000000 0000rrrr 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and edx, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
+    or  edx, ebx;        // 0000aaaa 0000rrrr 0000gggg 0000bbbb
+    mov ebx, edx;
+    shl ebx, 4;          // aaaa0000 rrrr0000 gggg0000 bbbb0000
+    or  edx, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::RGB565_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (0xff000000 |
+            ((*src & 0x0000f800) << 8) | ((*src & 0x0000e000) << 3) |
+            ((*src & 0x000007e0) << 5) | ((*src & 0x00000600) >> 1) |
+            ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
+    dest++;
+    *dest = (0xff000000 |
+            ((*src & 0xf8000000) >>  8) | ((*src & 0xe0000000) >> 13) |
+            ((*src & 0x07e00000) >> 11) | ((*src & 0x06000000) >> 17) |
+            ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // rrrr rggg gggb bbbb
+    // 11111111 rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 rrrrrggg gggbbbbb
+    and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
+    shl ebx, 5;          // 00000000 000rrrrr 00000000 00000000
+    or  eax, ebx;        // 00000000 000rrrrr rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
+    shl ebx, 5;          // 00000000 00000000 gggggg00 00000000
+    and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
+    shl eax, 3;          // 00000000 rrrrr000 00000000 bbbbb000
+    or  eax, ebx;        // 00000000 rrrrr000 gggggg00 bbbbb000
+    mov ebx, eax;
+    shr ebx, 5;          // 00000000 00000rrr rr000ggg ggg00bbb
+    and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
+    or  eax, ebx;        // 00000000 rrrrrrrr gggggg00 bbbbbbbb
+    mov ebx, eax;
+    shr ebx, 6;
+    and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
+    or  eax, ebx         // 00000000 rrrrrrrr gggggggg bbbbbbbb
+    or  eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    shr edx, 16;
+    mov eax, edx;        // 00000000 00000000 rrrrrggg gggbbbbb
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 rrrrrggg gggbbbbb
+    and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
+    shl ebx, 5;          // 00000000 000rrrrr 00000000 00000000
+    or  eax, ebx;        // 00000000 000rrrrr rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
+    shl ebx, 5;          // 00000000 00000000 gggggg00 00000000
+    and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
+    shl eax, 3;          // 00000000 rrrrr000 00000000 bbbbb000
+    or  eax, ebx;        // 00000000 rrrrr000 gggggg00 bbbbb000
+    mov ebx, eax;
+    shr ebx, 5;          // 00000000 00000rrr rr000ggg ggg00bbb
+    and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
+    or  eax, ebx;        // 00000000 rrrrrrrr gggggg00 bbbbbbbb
+    mov ebx, eax;
+    shr ebx, 6;
+    and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
+    or  eax, ebx         // 00000000 rrrrrrrr gggggggg bbbbbbbb
+    or  eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::A8_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x000000ff);
+    *dest |= (*dest << 8);
+    *dest |= (*dest << 16);
+    dest++;
+    *dest = (*src & 0x0000ff00);
+    *dest |= (*dest >> 8);
+    *dest |= (*dest << 16);
+    dest++;
+    *dest = (*src & 0x00ff0000);
+    *dest |= (*dest << 8);
+    *dest |= (*dest >> 16);
+    dest++;
+    *dest = (*src & 0xff000000);
+    *dest |= (*dest >> 8);
+    *dest |= (*dest >> 16);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa
+    // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;
+    and eax, 0x000000ff;
+    mov ebx, eax;        // 00000000 00000000 00000000 aaaaaaaa
+    shl ebx, 8;          // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    mov ebx, eax;
+    shl ebx, 16;         // aaaaaaaa aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x0000ff00;
+    mov ebx, eax;        // 00000000 00000000 aaaaaaaa 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 aaaaaaaa
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    mov ebx, eax;
+    shl ebx, 16;         // aaaaaaaa aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x00ff0000;
+    mov ebx, eax;        // 00000000 aaaaaaaa 00000000 00000000
+    shl ebx, 8;          // aaaaaaaa 00000000 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa 00000000 00000000
+    mov ebx, eax;
+    shr ebx, 16;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xff000000;
+    mov ebx, eax;        // aaaaaaaa 00000000 00000000 00000000
+    shr ebx, 8;          // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa 00000000 00000000
+    mov ebx, eax;
+    shr ebx, 16;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::AI44_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x0000000f);
+    *dest |= ((*dest << 8) | (*dest << 16));
+    *dest |= ((*src & 0x000000f0) << 20);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = (*src & 0x00000f00);
+    *dest |= ((*dest << 8) | (*dest >> 8));
+    *dest |= ((*src & 0x0000f000) << 12);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = (*src & 0x000f0000);
+    *dest |= ((*dest >> 8) | (*dest >> 16));
+    *dest |= ((*src & 0x00f00000) << 4);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = ((*src & 0x0f000000) >> 4);
+    *dest |= ((*dest >> 8) | (*dest >> 16));
+    *dest |= (*src & 0xf0000000);
+    *dest |= (*dest >> 4);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaiiii
+    // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+    mov edx, eax;
+    and eax, 0x000000f0; // 00000000 00000000 00000000 aaaa0000
+    mov ebx, edx;
+    shl eax, 20;         // 0000aaaa 00000000 00000000 00000000
+    and ebx, 0x0000000f; // 00000000 00000000 00000000 0000iiii
+    or  eax, ebx;        // 0000aaaa 00000000 00000000 0000iiii
+    shl ebx, 8;          // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 0000iiii 0000iiii
+    shl ebx, 8;          // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 0000iiii
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 iiii0000 iiii0000 iiii0000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    mov ebx, edx;
+    shl eax, 12;         // 0000aaaa 00000000 00000000 00000000
+    and ebx, 0x00000f00; // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 0000iiii 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000iiii
+    or  eax, ebx;        // 0000aaaa 00000000 0000iiii 0000iiii
+    shl ebx, 16;         // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 0000iiii
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 iiii0000 iiii0000 iiii0000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x00f00000; // 00000000 aaaa0000 00000000 00000000
+    mov ebx, edx;
+    shl eax, 4;          // 0000aaaa 00000000 00000000 00000000
+    and ebx, 0x000f0000; // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000iiii
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 0000iiii
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 iiii0000 iiii0000 iiii0000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xf0000000; // aaaa0000 00000000 00000000 00000000
+    mov ebx, edx;
+    and ebx, 0x0f000000; // 0000iiii 00000000 00000000 00000000
+    shr ebx, 4;          // 00000000 iiii0000 00000000 00000000
+    or  eax, ebx;        // aaaa0000 iiii0000 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 iiii0000 00000000
+    or  eax, ebx;        // aaaa0000 iiii0000 iiii0000 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 iiii0000
+    or  eax, ebx;        // aaaa0000 iiii0000 iiii0000 iiii0000
+    mov ebx, eax;
+    shr ebx, 4;          // 0000aaaa 0000iiii 0000iiii 0000iiii
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::AI88_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x000000ff);
+    *dest |= ((*dest << 8) | (*dest << 16));
+    *dest |= ((*src & 0x0000ff00) << 16);
+    dest++;
+    *dest = (*src & 0x00ff0000);
+    *dest |= ((*dest >> 8) | (*dest >> 16));
+    *dest |= (*src & 0xff000000);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa iiiiiiii
+    // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+    mov edx, eax;
+    and eax, 0x0000ffff; // 00000000 00000000 aaaaaaaa iiiiiiii
+    mov ebx, eax;        // 00000000 00000000 aaaaaaaa iiiiiiii
+    shl eax, 16;         // aaaaaaaa iiiiiiii 00000000 00000000
+    and ebx, 0x000000ff; // 00000000 00000000 00000000 iiiiiiii
+    or  eax, ebx;        // aaaaaaaa iiiiiiii 00000000 iiiiiiii
+    shl ebx, 8;          // 00000000 00000000 iiiiiiii 00000000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xffff0000; // aaaaaaaa iiiiiiii 00000000 00000000
+    mov ebx, eax;        // aaaaaaaa iiiiiiii 00000000 00000000
+    and ebx, 0x00ff0000; // 00000000 iiiiiiii 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 iiiiiiii 00000000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 iiiiiiii
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_ARGB1555(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = ((*src & 0xff000000) ? 0x00008000 : 0x00000000);
+    *dest |= (((*src & 0x00f80000) >> 9) |
+              ((*src & 0x0000f800) >> 6) |
+              ((*src & 0x000000f8) >> 3));
+    src++;
+    *dest |= ((*src & 0xff000000) ? 0x80000000 : 0x00000000);
+    *dest |= (((*src & 0x00f80000) << 7) |
+              ((*src & 0x0000f800) << 10) |
+              ((*src & 0x000000f8) << 13));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+#if 1
+    mov edx, eax;
+    and eax, 0xff000000;  // aaaa0000 00000000 00000000 00000000
+    jz transparent1;
+    mov eax, 0x00008000;  // 00000000 00000000 a0000000 00000000
+
+  transparent1:
+    mov ebx, edx;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shr ebx, 9;           // 00000000 00000000 0rrrrr00 00000000
+    or  eax, ebx;         // 00000000 00000000 arrrrr00 00000000
+    mov ebx, edx;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shr ebx, 6;           // 00000000 00000000 000000gg ggg00000
+    or  eax, ebx;         // 00000000 00000000 arrrrrgg ggg00000
+    and edx, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shr edx, 3;           // 00000000 00000000 00000000 000bbbbb
+    or  edx, eax;         // 00000000 00000000 arrrrrgg gggbbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xff000000;  // aaaa0000 00000000 00000000 00000000
+    jz transparent2;
+    or  edx, 0x80000000;  // a0000000 00000000 arrrrrgg gggbbbbb
+
+  transparent2:
+    mov eax, ebx;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shl ebx, 7;           // 0rrrrr00 00000000 00000000 00000000
+    or  edx, ebx;         // arrrrr00 00000000 arrrrrgg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shl ebx, 10;          // 000000gg ggg00000 00000000 00000000
+    or  edx, ebx;         // arrrrrgg ggg00000 arrrrrgg gggbbbbb
+    and eax, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shl eax, 13;          // 00000000 000bbbbb 00000000 00000000
+    or  edx, eax;         // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+#else
+    mov edx, eax;
+    and edx, 0x01000000;  // 0000000a 00000000 00000000 00000000
+    shr edx, 9;           // 00000000 00000000 a0000000 00000000
+    mov ebx, eax;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shr ebx, 9;           // 00000000 00000000 0rrrrr00 00000000
+    or  edx, ebx;         // 00000000 00000000 arrrrr00 00000000
+    mov ebx, eax;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shr ebx, 6;           // 00000000 00000000 000000gg ggg00000
+    or  edx, ebx;         // 00000000 00000000 arrrrrgg ggg00000
+    and eax, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shr eax, 3;           // 00000000 00000000 00000000 000bbbbb
+    or  edx, eax;         // 00000000 00000000 arrrrrgg gggbbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and ebx, 0x80000000;  // a0000000 00000000 00000000 00000000
+    or  edx, ebx;         // a0000000 00000000 arrrrrgg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shl ebx, 7;           // 0rrrrr00 00000000 00000000 00000000
+    or  edx, ebx;         // arrrrr00 00000000 arrrrrgg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shl ebx, 10;          // 000000gg ggg00000 00000000 00000000
+    or  edx, ebx;         // arrrrrgg ggg00000 arrrrrgg gggbbbbb
+    and eax, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shl eax, 13;          // 00000000 000bbbbb 00000000 00000000
+    or  edx, eax;         // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+#endif
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_ARGB4444(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0xf0000000) >> 16) |
+             ((*src & 0x00f00000) >> 12) |
+             ((*src & 0x0000f000) >> 8) |
+             ((*src & 0x000000f0) >> 4));
+    src++;
+    *dest |= ((*src & 0xf0000000) |
+              ((*src & 0x00f00000) << 4) |
+              ((*src & 0x0000f000) << 8) |
+              ((*src & 0x000000f0) << 12));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;
+    and edx, 0xf0000000;  // aaaa0000 00000000 00000000 00000000
+    shr edx, 16;          // 00000000 00000000 aaaa0000 00000000
+    mov ebx, eax;
+    and ebx, 0x00f00000;  // 00000000 rrrr0000 00000000 00000000
+    shr ebx, 12;          // 00000000 00000000 0000rrrr 00000000
+    or  edx, ebx;         // 00000000 00000000 aaaarrrr 00000000
+    mov ebx, eax;
+    and ebx, 0x0000f000;  // 00000000 00000000 gggg0000 00000000
+    shr ebx, 8;           // 00000000 00000000 00000000 gggg0000
+    or  edx, ebx;         // 00000000 00000000 aaaarrrr gggg0000
+    and eax, 0x000000f0;  // 00000000 00000000 00000000 bbbb0000
+    shr eax, 4;           // 00000000 00000000 00000000 0000bbbb
+    or  edx, eax;         // 00000000 00000000 aaaarrrr ggggbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and ebx, 0xf0000000;  // aaaa0000 00000000 00000000 00000000
+    or  edx, ebx;         // aaaa0000 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x00f00000;  // 00000000 rrrr0000 00000000 00000000
+    shl ebx, 4;           // 0000rrrr 00000000 00000000 00000000
+    or  edx, ebx;         // aaaarrrr 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x0000f000;  // 00000000 00000000 gggg0000 00000000
+    shl ebx, 8;           // 00000000 gggg0000 00000000 00000000
+    or  edx, ebx;         // aaaarrrr gggg0000 aaaarrrr ggggbbbb
+    and eax, 0x000000f0;  // 00000000 00000000 00000000 bbbb0000
+    shl eax, 12;          // 00000000 0000bbbb 00000000 00000000
+    or  edx, eax;         // arrrrrgg ggggbbbb aaaarrrr ggggbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_RGB565(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0x000000f8) >> 3) |
+             ((*src & 0x0000fc00) >> 5) |
+             ((*src & 0x00f80000) >> 8));
+    src++;
+    *dest |= (((*src & 0x000000f8) << 13) |
+              ((*src & 0x0000fc00) << 11) |
+              ((*src & 0x00f80000) << 8));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;
+    and edx, 0x000000F8;  // 00000000 00000000 00000000 bbbbb000
+    shr edx, 3;           // 00000000 00000000 00000000 000bbbbb
+    mov ebx, eax;
+    and ebx, 0x0000FC00;  // 00000000 00000000 gggggg00 00000000
+    shr ebx, 5;           // 00000000 00000000 00000ggg ggg00000
+    or  edx, ebx;         // 00000000 00000000 00000ggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x00F80000;  // 00000000 rrrrr000 00000000 00000000
+    shr ebx, 8;           // 00000000 00000000 rrrrr000 00000000
+    or  edx, ebx;         // 00000000 00000000 rrrrrggg gggbbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and ebx, 0x000000F8;  // 00000000 00000000 00000000 bbbbb000
+    shl ebx, 13;          // 00000000 000bbbbb 00000000 00000000
+    or  edx, ebx;         // 00000000 000bbbbb rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x0000FC00;  // 00000000 00000000 gggggg00 00000000
+    shl ebx, 11;          // 00000ggg ggg00000 00000000 00000000
+    or  edx, ebx;         // 00000ggg gggbbbbb rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x00F80000;  // 00000000 rrrrr000 00000000 00000000
+    shl ebx, 8;           // rrrrr000 00000000 00000000 00000000
+    or  edx, ebx;         // rrrrrggg gggbbbbb rrrrrggg gggbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_A8(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x0000ff00) >> 8;
+    src++;
+    *dest |= (*src & 0x0000ff00);
+    src++;
+    *dest |= ((*src & 0x0000ff00) << 8);
+    src++;
+    *dest |= ((*src & 0x0000ff00) << 16);
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+#if 0
+    mov edx, eax;         // we'll use A comp for every pixel
+    and edx, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    shr edx, 24;          // 00000000 00000000 00000000 aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    shr eax, 16;          // 00000000 00000000 aaaaaaaa 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    shr eax, 8;           // 00000000 aaaaaaaa 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    or  edx, eax;         // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
+#endif
+
+#if 1
+    mov edx, eax;         // we'll use G comp for every pixel
+    and edx, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    shr edx, 8;           // 00000000 00000000 00000000 aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    shl eax, 8;           // 00000000 aaaaaaaa 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    shl eax, 16;          // aaaaaaaa 00000000 00000000 00000000
+    or  edx, eax;         // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
+#endif
+
+#if 0
+    mov edx, eax;
+    and edx, 0x000000FF;  // 00000000 00000000 00000000 aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x00FF0000;  // 00000000 aaaaaaaa 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    or  edx, eax;         // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
+#endif
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_AI44(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0xf0000000) >> 24) | ((*src & 0x0000f000) >> 12));
+    src++;
+    *dest |= (((*src & 0xf0000000) >> 16) | ((*src & 0x0000f000) >> 4));
+    src++;
+    *dest |= (((*src & 0xf0000000) >> 8) | ((*src & 0x0000f000) << 4));
+    src++;
+    *dest |= ((*src & 0xf0000000) | ((*src & 0x0000f000) << 12));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;         // use A and G comps MSB
+    and edx, 0xF0000000;  // aaaa0000 00000000 00000000 00000000
+    mov ebx, eax;
+    shr edx, 24;          // 00000000 00000000 00000000 aaaa0000
+    and ebx, 0x0000F000;  // 00000000 00000000 iiii0000 00000000
+    shr ebx, 12;          // 00000000 00000000 00000000 0000iiii
+    or  edx, ebx;         // 00000000 00000000 00000000 aaaaiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xF0000000;  // aaaa0000 00000000 00000000 00000000
+    shr eax, 16;          // 00000000 00000000 aaaa0000 00000000
+    and ebx, 0x0000F000;  // 00000000 00000000 iiii0000 00000000
+    shr ebx, 4;           // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;         // 00000000 00000000 aaaaiiii 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaiiii aaaaiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xF0000000;  // aaaa0000 00000000 00000000 00000000
+    shr eax, 8;           // 00000000 aaaa0000 00000000 00000000
+    and ebx, 0x0000F000;  // 00000000 00000000 iiii0000 00000000
+    shl ebx, 4;           // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;         // 00000000 aaaaiiii 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaiiii aaaaiiii aaaaiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xF0000000;   // aaaa0000 00000000 00000000 00000000
+    and ebx, 0x0000F000;   // 00000000 00000000 iiii0000 00000000
+    shl ebx, 12;           // 0000iiii 00000000 00000000 00000000
+    or  eax, ebx;          // aaaaiiii 00000000 00000000 00000000
+    or  edx, eax;          // aaaaiiii aaaaiiii aaaaiiii aaaaiiii
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_AI88(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0xff000000) >> 16) | ((*src & 0x0000ff00) >> 8));
+    src++;
+    *dest |= ((*src & 0xff000000) | ((*src & 0x0000ff00) << 8));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;
+    and edx, 0xFF000000;   // aaaaaaaa 00000000 00000000 00000000
+    mov ebx, eax;
+    shr edx, 16;           // 00000000 00000000 aaaaaaaa 00000000
+    and ebx, 0x0000FF00;   // 00000000 00000000 iiiiiiii 00000000
+    shr ebx, 8;            // 00000000 00000000 00000000 iiiiiiii
+    or  edx, ebx;          // 00000000 00000000 aaaaaaaa iiiiiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xFF000000;    // aaaaaaaa 00000000 00000000 00000000
+    and ebx, 0x0000FF00;    // 00000000 00000000 iiiiiiii 00000000
+    shl ebx, 8;             // 00000000 iiiiiiii 00000000 00000000
+    or  eax, ebx;           // aaaaaaaa iiiiiiii 00000000 00000000
+    or  edx, eax;           // aaaaaaaa iiiiiiii aaaaaaaa iiiiiiii
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+/* R.W. Floyd and L. Steinberg, An adaptive algorithm
+ * for spatial grey scale, Proceedings of the Society
+ * of Information Display 17, pp75-77, 1976
+ */
+void
+TxQuantize::ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  int i, x, y;
+  int qr, qg, qb; /* quantized incoming values */
+  int ir, ig, ib; /* incoming values */
+  int t;
+  int *errR = new int[width];
+  int *errG = new int[width];
+  int *errB = new int[width];
+
+  uint16 *dest = (uint16 *)dst;
+
+  for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* incoming pixel values */
+      ir = ((*src >> 16) & 0xFF) * 10000;
+      ig = ((*src >>  8) & 0xFF) * 10000;
+      ib = ((*src      ) & 0xFF) * 10000;
+
+      if (x == 0) qr = qg = qb = 0;
+
+      /* quantize pixel values. 
+       * qr * 0.4375 is the error from the pixel to the left, 
+       * errR is the error from the pixel to the top, top left, and top right */
+      /* qr * 0.4375 is the error distribution to the EAST in 
+       * the previous loop */
+      ir += errR[x] + qr * 4375 / 10000;
+      ig += errG[x] + qg * 4375 / 10000;
+      ib += errB[x] + qb * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST in the previous loop 
+       * can't calculate in the previous loop because it steps on 
+       * the above quantization */
+      errR[x] = qr * 625 / 10000;
+      errG[x] = qg * 625 / 10000;
+      errB[x] = qb * 625 / 10000;
+
+      qr = ir;
+      qg = ig;
+      qb = ib;
+
+      /* clamp */
+      if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000; 
+      if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
+      if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
+
+      /* convert to RGB565 */
+      qr = qr * 0x1F / 2550000;
+      qg = qg * 0x3F / 2550000;
+      qb = qb * 0x1F / 2550000;
+
+      /* this is the dithered pixel */
+      t  = (qr << 11) | (qg << 5) | qb;
+
+      /* compute the errors */
+      qr = ((qr << 3) | (qr >> 2)) * 10000;
+      qg = ((qg << 2) | (qg >> 4)) * 10000;
+      qb = ((qb << 3) | (qb >> 2)) * 10000;
+      qr = ir - qr;
+      qg = ig - qg;
+      qb = ib - qb;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errR[x - 1] += qr * 1875 / 10000;
+        errG[x - 1] += qg * 1875 / 10000;
+        errB[x - 1] += qb * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errR[x] += qr * 3125 / 10000;
+      errG[x] += qg * 3125 / 10000;
+      errB[x] += qb * 3125 / 10000;
+
+      *dest = (t & 0xFFFF);
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errR;
+  delete [] errG;
+  delete [] errB;
+}
+
+
+void
+TxQuantize::ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  int i, x, y;
+  int qr, qg, qb; /* quantized incoming values */
+  int ir, ig, ib; /* incoming values */
+  int t;
+  int *errR = new int[width];
+  int *errG = new int[width];
+  int *errB = new int[width];
+
+  uint16 *dest = (uint16 *)dst;
+
+  for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* incoming pixel values */
+      ir = ((*src >> 16) & 0xFF) * 10000;
+      ig = ((*src >>  8) & 0xFF) * 10000;
+      ib = ((*src      ) & 0xFF) * 10000;
+
+      if (x == 0) qr = qg = qb = 0;
+
+      /* quantize pixel values. 
+       * qr * 0.4375 is the error from the pixel to the left, 
+       * errR is the error from the pixel to the top, top left, and top right */
+      /* qr * 0.4375 is the error distribution to the EAST in 
+       * the previous loop */
+      ir += errR[x] + qr * 4375 / 10000;
+      ig += errG[x] + qg * 4375 / 10000;
+      ib += errB[x] + qb * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST of the previous loop. 
+       * cannot calculate in the previous loop because it steps on 
+       * the above quantization */
+      errR[x] = qr * 625 / 10000;
+      errG[x] = qg * 625 / 10000;
+      errB[x] = qb * 625 / 10000;
+
+      qr = ir;
+      qg = ig;
+      qb = ib;
+
+      /* clamp */
+      if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
+      if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
+      if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
+
+      /* convert to RGB555 */
+      qr = qr * 0x1F / 2550000;
+      qg = qg * 0x1F / 2550000;
+      qb = qb * 0x1F / 2550000;
+
+      /* this is the dithered pixel */
+      t  = (qr << 10) | (qg << 5) | qb;
+      t |= ((*src >> 24) ? 0x8000 : 0);
+
+      /* compute the errors */
+      qr = ((qr << 3) | (qr >> 2)) * 10000;
+      qg = ((qg << 3) | (qg >> 2)) * 10000;
+      qb = ((qb << 3) | (qb >> 2)) * 10000;
+      qr = ir - qr;
+      qg = ig - qg;
+      qb = ib - qb;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errR[x - 1] += qr * 1875 / 10000;
+        errG[x - 1] += qg * 1875 / 10000;
+        errB[x - 1] += qb * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errR[x] += qr * 3125 / 10000;
+      errG[x] += qg * 3125 / 10000;
+      errB[x] += qb * 3125 / 10000;
+
+      *dest = (t & 0xFFFF);
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errR;
+  delete [] errG;
+  delete [] errB;
+}
+
+void
+TxQuantize::ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  /* NOTE: alpha dithering looks better for alpha gradients, but are prone
+   * to producing noisy speckles for constant or step level alpha. Output
+   * results should always be checked.
+   */
+  boolean ditherAlpha = 0;
+
+  int i, x, y;
+  int qr, qg, qb, qa; /* quantized incoming values */
+  int ir, ig, ib, ia; /* incoming values */
+  int t;
+  int *errR = new int[width];
+  int *errG = new int[width];
+  int *errB = new int[width];
+  int *errA = new int[width];
+
+  uint16 *dest = (uint16 *)dst;
+
+  for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = errA[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* incoming pixel values */
+      ir = ((*src >> 16) & 0xFF) * 10000;
+      ig = ((*src >>  8) & 0xFF) * 10000;
+      ib = ((*src      ) & 0xFF) * 10000;
+      ia = ((*src >> 24) & 0xFF) * 10000;
+
+      if (x == 0) qr = qg = qb = qa = 0;
+
+      /* quantize pixel values. 
+       * qr * 0.4375 is the error from the pixel to the left, 
+       * errR is the error from the pixel to the top, top left, and top right */
+      /* qr * 0.4375 is the error distribution to the EAST in 
+       * the previous loop */
+      ir += errR[x] + qr * 4375 / 10000;
+      ig += errG[x] + qg * 4375 / 10000;
+      ib += errB[x] + qb * 4375 / 10000;
+      ia += errA[x] + qa * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST of the previous loop. 
+       * cannot calculate in the previous loop because it steps on 
+       * the above quantization */
+      errR[x] = qr * 625 / 10000;
+      errG[x] = qg * 625 / 10000;
+      errB[x] = qb * 625 / 10000;
+      errA[x] = qa * 625 / 10000;
+
+      qr = ir;
+      qg = ig;
+      qb = ib;
+      qa = ia;
+
+      /* clamp */
+      if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
+      if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
+      if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
+      if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
+
+      /* convert to RGB444 */
+      qr = qr * 0xF / 2550000;
+      qg = qg * 0xF / 2550000;
+      qb = qb * 0xF / 2550000;
+      qa = qa * 0xF / 2550000;
+
+      /* this is the value to be returned */
+      if (ditherAlpha) {
+        t = (qa << 12) | (qr <<  8) | (qg << 4) | qb;
+      } else {
+        t = (qr <<  8) | (qg << 4) | qb;
+        t |= (*src >> 16) & 0xF000;
+      }
+
+      /* compute the errors */
+      qr = ((qr << 4) | qr) * 10000;
+      qg = ((qg << 4) | qg) * 10000;
+      qb = ((qb << 4) | qb) * 10000;
+      qa = ((qa << 4) | qa) * 10000;
+      qr = ir - qr;
+      qg = ig - qg;
+      qb = ib - qb;
+      qa = ia - qa;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errR[x - 1] += qr * 1875 / 10000;
+        errG[x - 1] += qg * 1875 / 10000;
+        errB[x - 1] += qb * 1875 / 10000;
+        errA[x - 1] += qa * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errR[x] += qr * 3125 / 10000;
+      errG[x] += qg * 3125 / 10000;
+      errB[x] += qb * 3125 / 10000;
+      errA[x] += qa * 3125 / 10000;
+
+      *dest = (t & 0xFFFF);
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errR;
+  delete [] errG;
+  delete [] errB;
+  delete [] errA;
+}
+
+void
+TxQuantize::ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  /* NOTE: alpha dithering looks better for alpha gradients, but are prone
+   * to producing noisy speckles for constant or step level alpha. Output
+   * results should always be checked.
+   */
+  boolean ditherAlpha = 0;
+
+  int i, x, y;
+  int qi, qa; /* quantized incoming values */
+  int ii, ia; /* incoming values */
+  int t;
+  int *errI = new int[width];
+  int *errA = new int[width];
+
+  uint8 *dest = (uint8 *)dst;
+
+  for (i = 0; i < width; i++) errI[i] = errA[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 */
+      ii = ((*src >> 16) & 0xFF) * 2990 +
+           ((*src >>  8) & 0xFF) * 5870 +
+           ((*src      ) & 0xFF) * 1140;
+      ia = ((*src >> 24) & 0xFF) * 10000;
+
+      if (x == 0) qi = qa = 0;
+
+      /* quantize pixel values. 
+       * qi * 0.4375 is the error from the pixel to the left, 
+       * errI is the error from the pixel to the top, top left, and top right */
+      /* qi * 0.4375 is the error distrtibution to the EAST in
+       * the previous loop */
+      ii += errI[x] + qi * 4375 / 10000;
+      ia += errA[x] + qa * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST in the previous loop. 
+       * cannot calculate in the previous loop because it steps on 
+       * the above quantization */
+      errI[x] = qi * 625 / 10000;
+      errA[x] = qa * 625 / 10000;
+
+      qi = ii;
+      qa = ia;
+
+      /* clamp */
+      if (qi < 0) qi = 0; else if (qi > 2550000) qi = 2550000;
+      if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
+
+      /* convert to I4 */
+      qi = qi * 0xF / 2550000;
+      qa = qa * 0xF / 2550000;
+
+      /* this is the value to be returned */
+      if (ditherAlpha) {
+        t = (qa << 4) | qi;
+      } else {
+        t = qi;
+        t |= ((*src >> 24) & 0xF0);
+      }
+
+      /* compute the errors */
+      qi = ((qi << 4) | qi) * 10000;
+      qa = ((qa << 4) | qa) * 10000;
+      qi = ii - qi;
+      qa = ia - qa;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errI[x - 1] += qi * 1875 / 10000;
+        errA[x - 1] += qa * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errI[x] += qi * 3125 / 10000;
+      errA[x] += qa * 3125 / 10000;
+
+      *dest = t & 0xFF;
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errI;
+  delete [] errA;
+}
+
+void
+TxQuantize::ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height)
+{
+  int x, y;
+  uint16 *dest = (uint16 *)dst;
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+#if 1
+      /* libpng style grayscale conversion.
+       * Reduce RGB files to grayscale with or without alpha
+       * using the equation given in Poynton's ColorFAQ at
+       * <http://www.inforamp.net/~poynton/>
+       * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net
+       *
+       *     Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
+       *
+       *  We approximate this with
+       *
+       *     Y = 0.21268 * R    + 0.7151 * G    + 0.07217 * B
+       *
+       *  which can be expressed with integers as
+       *
+       *     Y = (6969 * R + 23434 * G + 2365 * B)/32768
+       *
+       *  The calculation is to be done in a linear colorspace.
+       */
+      *dest = (((int)((((*src >> 16) & 0xFF) * 6969 +
+                       ((*src >>  8) & 0xFF) * 23434 +
+                       ((*src      ) & 0xFF) * 2365) / 32768) & 0xFF) |
+              (uint16)((*src >> 16) & 0xFF00));
+#else
+      /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
+       * this is same as the standard NTSC gray scale conversion. */
+      *dest = (((int)((((*src >> 16) & 0xFF) * 299 +
+                       ((*src >>  8) & 0xFF) * 587 +
+                       ((*src      ) & 0xFF) * 114) / 1000) & 0xFF) |
+              (uint16)((*src >> 16) & 0xFF00));
+#endif
+      dest++;
+      src++;
+    }
+  }
+}
+
+void
+TxQuantize::ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height)
+{
+  int x, y;
+  uint8 *dest = (uint8 *)dst;
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+#if 1
+      /* libpng style Intensity = (6969 * R + 23434 * G + 2365 * B)/32768 */
+      *dest = (int)((((*src >> 16) & 0xFF) * 6969 +
+                     ((*src >>  8) & 0xFF) * 23434 +
+                     ((*src      ) & 0xFF) * 2365) / 32768) & 0xFF;
+#else
+      /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
+       * this is same as the standard NTSC gray scale conversion. */
+      *dest = (int)((((*src >>16) & 0xFF) * 299 +
+                     ((*src >> 8) & 0xFF) * 587 +
+                     ((*src     ) & 0xFF) * 114) / 1000) & 0xFF;
+#endif
+      dest++;
+      src++;
+    }
+  }
+}
+
+void
+TxQuantize::P8_16BPP(uint32* src, uint32* dest, int width, int height, uint32* palette)
+{
+  /* passed in palette is RGBA5551 format */
+#if 1
+  int i;
+  int size = width * height;
+  for (i = 0; i < size; i++) {
+    ((uint16*)dest)[i] = ((uint16*)palette)[(int)(((uint8*)src)[i])];
+    ((uint16*)dest)[i] = ((((uint16*)dest)[i] << 15) | (((uint16*)dest)[i] >> 1));
+  }
+#else
+
+  /* not finished yet... */
+
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+    mov edx, dword ptr [palette];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+boolean
+TxQuantize::quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer)
+{
+  typedef void (TxQuantize::*quantizerFunc)(uint32* src, uint32* dest, int width, int height);
+  quantizerFunc quantizer;
+  int bpp_shift = 0;
+
+  if (destformat == GR_TEXFMT_ARGB_8888) {
+    switch (srcformat) {
+    case GR_TEXFMT_ARGB_1555:
+      quantizer = &TxQuantize::ARGB1555_ARGB8888;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ARGB_4444:
+      quantizer = &TxQuantize::ARGB4444_ARGB8888;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_RGB_565:
+      quantizer = &TxQuantize::RGB565_ARGB8888;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ALPHA_8:
+      quantizer = &TxQuantize::A8_ARGB8888;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_44:
+      quantizer = &TxQuantize::AI44_ARGB8888;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_88:
+      quantizer = &TxQuantize::AI88_ARGB8888;
+      bpp_shift = 1;
+      break;
+    default:
+      return 0;
+    }
+
+    unsigned int numcore = _numcore;
+    unsigned int blkrow = 0;
+    while (numcore > 1 && blkrow == 0) {
+      blkrow = (height >> 2) / numcore;
+      numcore--;
+    }
+	numcore = 1;
+    if (blkrow > 0 && numcore > 1) {
+_asm int 3
+#ifdef tofix
+      boost::thread *thrd[MAX_NUMCORE];
+      unsigned int i;
+      int blkheight = blkrow << 2;
+      unsigned int srcStride = (width * blkheight) << (2 - bpp_shift);
+      unsigned int destStride = srcStride << bpp_shift;
+      for (i = 0; i < numcore - 1; i++) {
+        thrd[i] = new boost::thread(boost::bind(quantizer,
+                                                this,
+                                                (uint32*)src,
+                                                (uint32*)dest,
+                                                width,
+                                                blkheight));
+        src  += srcStride;
+        dest += destStride;
+      }
+      thrd[i] = new boost::thread(boost::bind(quantizer,
+                                              this,
+                                              (uint32*)src,
+                                              (uint32*)dest,
+                                              width,
+                                              height - blkheight * i));
+      for (i = 0; i < numcore; i++) {
+        thrd[i]->join();
+        delete thrd[i];
+      }
+#endif
+    } else {
+      (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
+    }
+
+  } else if (srcformat == GR_TEXFMT_ARGB_8888) {
+    switch (destformat) {
+    case GR_TEXFMT_ARGB_1555:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB1555 : &TxQuantize::ARGB8888_ARGB1555_ErrD;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ARGB_4444:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB4444 : &TxQuantize::ARGB8888_ARGB4444_ErrD;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_RGB_565:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_RGB565 : &TxQuantize::ARGB8888_RGB565_ErrD;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ALPHA_8:
+    case GR_TEXFMT_INTENSITY_8:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_A8 : &TxQuantize::ARGB8888_I8_Slow;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_44:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI44 : &TxQuantize::ARGB8888_AI44_ErrD;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_88:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI88 : &TxQuantize::ARGB8888_AI88_Slow;
+      bpp_shift = 1;
+      break;
+    default:
+      return 0;
+    }
+
+    unsigned int numcore = _numcore;
+    unsigned int blkrow = 0;
+    while (numcore > 1 && blkrow == 0) {
+      blkrow = (height >> 2) / numcore;
+      numcore--;
+    }
+    if (blkrow > 0 && numcore > 1) {
+_asm int 3
+#ifdef tofix
+	boost::thread *thrd[MAX_NUMCORE];
+      unsigned int i;
+      int blkheight = blkrow << 2;
+      unsigned int srcStride = (width * blkheight) << 2;
+      unsigned int destStride = srcStride >> bpp_shift;
+      for (i = 0; i < numcore - 1; i++) {
+        thrd[i] = new boost::thread(boost::bind(quantizer,
+                                                this,
+                                                (uint32*)src,
+                                                (uint32*)dest,
+                                                width,
+                                                blkheight));
+        src  += srcStride;
+        dest += destStride;
+      }
+      thrd[i] = new boost::thread(boost::bind(quantizer,
+                                              this,
+                                              (uint32*)src,
+                                              (uint32*)dest,
+                                              width,
+                                              height - blkheight * i));
+      for (i = 0; i < numcore; i++) {
+        thrd[i]->join();
+        delete thrd[i];
+      }
+#endif
+	} else {
+      (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
+    }
+
+  } else {
+    return 0;
+  }
+
+  return 1;
+}
+
+boolean
+TxQuantize::FXT1(uint8 *src, uint8 *dest,
+             int srcwidth, int srcheight, uint16 srcformat,
+             int *destwidth, int *destheight, uint16 *destformat)
+{
+  /*
+   * NOTE: src must be in ARGB8888 format, srcformat describes
+   * the closest 16bbp representation of src.
+   *
+   * NOTE: I have modified the dxtn library to use ARGB format
+   * which originaly was ABGR format.
+   */
+
+  boolean bRet = 0;
+
+  if (_tx_compress_fxt1 &&
+      srcwidth >= 8 && srcheight >= 4) {
+    /* compress to fxt1
+     * width and height must be larger than 8 and 4 respectively
+     */
+    int dstRowStride = ((srcwidth + 7) & ~7) << 1;
+    int srcRowStride = (srcwidth << 2);
+
+    unsigned int numcore = _numcore;
+    unsigned int blkrow = 0;
+    while (numcore > 1 && blkrow == 0) {
+      blkrow = (srcheight >> 2) / numcore;
+      numcore--;
+    }
+    if (blkrow > 0 && numcore > 1) {
+_asm int 3
+#ifdef tofix
+		boost::thread *thrd[MAX_NUMCORE];
+      unsigned int i;
+      int blkheight = blkrow << 2;
+      unsigned int srcStride = (srcwidth * blkheight) << 2;
+      unsigned int destStride = dstRowStride * blkrow;
+      for (i = 0; i < numcore - 1; i++) {
+        thrd[i] = new boost::thread(boost::bind(_tx_compress_fxt1,
+                                                srcwidth,
+                                                blkheight,
+                                                4,
+                                                src,
+                                                srcRowStride,
+                                                dest,
+                                                dstRowStride));
+        src  += srcStride;
+        dest += destStride;
+      }
+      thrd[i] = new boost::thread(boost::bind(_tx_compress_fxt1,
+                                              srcwidth,
+                                              srcheight - blkheight * i,
+                                              4,
+                                              src,
+                                              srcRowStride,
+                                              dest,
+                                              dstRowStride));
+      for (i = 0; i < numcore; i++) {
+        thrd[i]->join();
+        delete thrd[i];
+      }
+#endif
+    } else {
+      (*_tx_compress_fxt1)(srcwidth,      /* width */
+                           srcheight,     /* height */
+                           4,             /* comps: ARGB8888=4, RGB888=3 */
+                           src,           /* source */
+                           srcRowStride,  /* width*comps */
+                           dest,          /* destination */
+                           dstRowStride); /* 16 bytes per 8x4 texel */
+    }
+
+    /* dxtn adjusts width and height to M8 and M4 respectively by replication */
+    *destwidth  = (srcwidth  + 7) & ~7;
+    *destheight = (srcheight + 3) & ~3;
+    *destformat = GR_TEXFMT_ARGB_CMP_FXT1;
+
+    bRet = 1;
+  }
+  
+  return bRet;
+}
+
+boolean
+TxQuantize::DXTn(uint8 *src, uint8 *dest,
+             int srcwidth, int srcheight, uint16 srcformat,
+             int *destwidth, int *destheight, uint16 *destformat)
+{
+  /*
+   * NOTE: src must be in ARGB8888 format, srcformat describes
+   * the closest 16bbp representation of src.
+   *
+   * NOTE: I have modified the dxtn library to use ARGB format
+   * which originaly was ABGR format.
+   */
+
+  boolean bRet = 0;
+
+  if (_tx_compress_dxtn &&
+      srcwidth >= 4 && srcheight >= 4) {
+    /* compress to dxtn
+     * width and height must be larger than 4
+     */
+
+    /* skip formats that DXTn won't help in size. */
+    if (srcformat == GR_TEXFMT_ALPHA_8 ||
+        srcformat == GR_TEXFMT_ALPHA_INTENSITY_44) {
+      ; /* shutup compiler */
+    } else {
+      int dstRowStride = ((srcwidth + 3) & ~3) << 2;
+      int compression = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
+
+      *destformat = GR_TEXFMT_ARGB_CMP_DXT5;
+
+#if !GLIDE64_DXTN
+      /* okay... we are going to disable DXT1 with 1bit alpha
+       * for Glide64. some textures have all 0 alpha values.
+       * see "N64 Kobe Bryant in NBA Courtside"
+       */
+      if (srcformat == GR_TEXFMT_ARGB_1555) {
+        dstRowStride >>= 1;
+        compression = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
+        *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
+      } else
+#endif
+      if (srcformat == GR_TEXFMT_RGB_565 ||
+          srcformat == GR_TEXFMT_INTENSITY_8) {
+        dstRowStride >>= 1;
+        compression = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
+        *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
+      }
+
+      unsigned int numcore = _numcore;
+      unsigned int blkrow = 0;
+      while (numcore > 1 && blkrow == 0) {
+        blkrow = (srcheight >> 2) / numcore;
+        numcore--;
+      }
+      if (blkrow > 0 && numcore > 1) {
+_asm int 3
+#ifdef tofix
+		  boost::thread *thrd[MAX_NUMCORE];
+        unsigned int i;
+        int blkheight = blkrow << 2;
+        unsigned int srcStride = (srcwidth * blkheight) << 2;
+        unsigned int destStride = dstRowStride * blkrow;
+        for (i = 0; i < numcore - 1; i++) {
+          thrd[i] = new boost::thread(boost::bind(_tx_compress_dxtn,
+                                                  4,
+                                                  srcwidth,
+                                                  blkheight,
+                                                  src,
+                                                  compression,
+                                                  dest,
+                                                  dstRowStride));
+          src  += srcStride;
+          dest += destStride;
+        }
+        thrd[i] = new boost::thread(boost::bind(_tx_compress_dxtn,
+                                                4,
+                                                srcwidth,
+                                                srcheight - blkheight * i,
+                                                src,
+                                                compression,
+                                                dest,
+                                                dstRowStride));
+        for (i = 0; i < numcore; i++) {
+          thrd[i]->join();
+          delete thrd[i];
+        }
+#endif
+	  } else {
+        (*_tx_compress_dxtn)(4,             /* comps: ARGB8888=4, RGB888=3 */
+                             srcwidth,      /* width */
+                             srcheight,     /* height */
+                             src,           /* source */
+                             compression,   /* format */
+                             dest,          /* destination */
+                             dstRowStride); /* DXT1 = 8 bytes per 4x4 texel
+                                             * others = 16 bytes per 4x4 texel */
+      }
+
+      /* dxtn adjusts width and height to M4 by replication */
+      *destwidth  = (srcwidth  + 3) & ~3;
+      *destheight = (srcheight + 3) & ~3;
+
+      bRet = 1;
+    }
+  }
+
+  return bRet;
+}
+
+boolean
+TxQuantize::compress(uint8 *src, uint8 *dest,
+                    int srcwidth, int srcheight, uint16 srcformat,
+                    int *destwidth, int *destheight, uint16 *destformat,
+                    int compressionType)
+{
+  boolean bRet = 0;
+
+  switch (compressionType) {
+  case FXT1_COMPRESSION:
+    bRet = FXT1(src, dest,
+                srcwidth, srcheight, srcformat,
+                destwidth, destheight, destformat);
+    break;
+  case S3TC_COMPRESSION:
+    bRet = DXTn(src, dest,
+                srcwidth, srcheight, srcformat,
+                destwidth, destheight, destformat);
+    break;
+  case NCC_COMPRESSION:
+    /* TODO: narrow channel compression */
+    ;
+  }
+
+  return bRet;
+}
+
+#if 0 /* unused */
+void
+TxQuantize::I8_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa
+    // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+    mov edx, eax;
+    and eax, 0x000000ff;
+    mov ebx, eax;        // 00000000 00000000 00000000 aaaaaaaa
+    shl ebx, 8;          // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    shl ebx, 8;         // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x0000ff00;
+    mov ebx, eax;        // 00000000 00000000 aaaaaaaa 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 aaaaaaaa
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    shl ebx, 16;         // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x00ff0000;
+    mov ebx, eax;        // 00000000 aaaaaaaa 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa 00000000
+    shr ebx, 8;         // 00000000 00000000 00000000 aaaaaaaa
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xff000000;
+    mov ebx, eax;        // aaaaaaaa 00000000 00000000 00000000
+    shr ebx, 8;          // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa 00000000 00000000
+    shr ebx, 8;         // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa aaaaaaaa 00000000
+    shr eax, 8;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+
+void
+TxQuantize::ARGB8888_I8(uint32* src, uint32* dest, int width, int height)
+{
+  ARGB8888_A8(src, dest, width, height);
+}
+
+void
+TxQuantize::ARGB1555_ABGR8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // arrr rrgg gggb bbbb
+    // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+    mov edx, eax;         // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent1;
+    or  ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent1:
+    mov eax, edx;         // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 14;          // edx = 00000000 00000bbb bb000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000bbb bb000000 00000000
+    shl edx, 5;           // edx = 00000000 bbbbb000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shr edx, 7;           // edx = 00000000 00000000 00000000 rrrrr000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000rrr
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    shr eax, 16;          // eax = 00000000 00000000 arrrrrgg gggbbbbb
+    mov edx, eax;         // edx = 00000000 00000000 arrrrrgg gggbbbbb
+    and ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent2;
+    or  ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent2:
+    mov eax, edx;         // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 14;          // edx = 00000000 00000bbb bb000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000bbb bb000000 00000000
+    shl edx, 5;           // edx = 00000000 bbbbb000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shr edx, 7;           // edx = 00000000 00000000 00000000 rrrrr000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000rrr
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+
+void
+TxQuantize::ARGB4444_ABGR8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaa rrrr gggg bbbb
+    // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+    mov edx, eax;
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
+    shl ebx, 16;         // 00000000 0000bbbb 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000rrrr
+    and eax, 0xfffffff0;
+    or  eax, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
+    mov ebx, eax;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and eax, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
+    or  eax, ebx;        // 0000aaaa 0000bbbb 0000gggg 0000rrrr
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 bbbb0000 gggg0000 rrrr0000
+    or  eax, ebx;        // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], eax;
+
+    add edi, 4;
+
+    shr edx, 16;
+    mov ebx, edx;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
+    shl ebx, 16;         // 00000000 0000bbbb 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000rrrr
+    and edx, 0xfffffff0;
+    or  edx, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
+    mov ebx, edx;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and edx, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
+    or  edx, ebx;        // 0000aaaa 0000bbbb 0000gggg 0000rrrr
+    mov ebx, edx;
+    shl ebx, 4;          // aaaa0000 bbbb0000 gggg0000 rrrr0000
+    or  edx, ebx;        // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+
+void
+TxQuantize::ARGB8888_ABGR8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = width * height;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+    mov edx, eax;
+    bswap edx;
+    shr edx, 8;
+    and eax, 0xff000000;
+
+    or eax, edx;
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+#endif
diff --git a/Source/GlideHQ/TxQuantize.h b/Source/GlideHQ/TxQuantize.h
new file mode 100644
index 000000000..d3c6ae6dc
--- /dev/null
+++ b/Source/GlideHQ/TxQuantize.h
@@ -0,0 +1,99 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXQUANTIZE_H__
+#define __TXQUANTIZE_H__
+
+/* Glide64 DXTn workaround
+ * (0:disable, 1:enable) */
+#define GLIDE64_DXTN 1
+
+#include "TxInternal.h"
+#include "TxUtil.h"
+
+class TxQuantize
+{
+private:
+  TxUtil *_txUtil;
+  int _numcore;
+
+  fxtCompressTexFuncExt _tx_compress_fxt1;
+  dxtCompressTexFuncExt _tx_compress_dxtn;
+
+  /* fast optimized... well, sort of. */
+  void ARGB1555_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void ARGB4444_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void RGB565_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void A8_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void AI44_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void AI88_ARGB8888(uint32* src, uint32* dst, int width, int height);
+
+  void ARGB8888_ARGB1555(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ARGB4444(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_RGB565(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_A8(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI44(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI88(uint32* src, uint32* dst, int width, int height);
+
+  /* quality */
+  void ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height);
+
+  /* compressors */
+  boolean FXT1(uint8 *src, uint8 *dest,
+               int srcwidth, int srcheight, uint16 srcformat,
+               int *destwidth, int *destheight, uint16 *destformat);
+  boolean DXTn(uint8 *src, uint8 *dest,
+               int srcwidth, int srcheight, uint16 srcformat,
+               int *destwidth, int *destheight, uint16 *destformat);
+
+public:
+  TxQuantize();
+  ~TxQuantize();
+
+  /* others */
+  void P8_16BPP(uint32* src, uint32* dst, int width, int height, uint32* palette);
+
+  boolean quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer = 1);
+
+  boolean compress(uint8 *src, uint8 *dest,
+                   int srcwidth, int srcheight, uint16 srcformat,
+                   int *destwidth, int *destheight, uint16 *destformat,
+                   int compressionType);
+
+
+#if 0 /* unused */
+  void ARGB8888_I8(uint32* src, uint32* dst, int width, int height);
+  void I8_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  
+  void ARGB1555_ABGR8888(uint32* src, uint32* dst, int width, int height);
+  void ARGB4444_ABGR8888(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ABGR8888(uint32* src, uint32* dst, int width, int height);
+#endif
+};
+
+#endif /* __TXQUANTIZE_H__ */
diff --git a/Source/GlideHQ/TxReSample.cpp b/Source/GlideHQ/TxReSample.cpp
new file mode 100644
index 000000000..138428b77
--- /dev/null
+++ b/Source/GlideHQ/TxReSample.cpp
@@ -0,0 +1,417 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "TxReSample.h"
+#include "TxDbg.h"
+#include <stdlib.h>
+#include <memory.h>
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+int
+TxReSample::nextPow2(int num)
+{
+  num = num - 1;
+  num = num | (num >> 1);
+  num = num | (num >> 2);
+  num = num | (num >> 4);
+  num = num | (num >> 8);
+  num = num | (num >> 16);
+  /*num = num | (num >> 32);*//* for 64bit architecture */
+  num = num + 1;
+
+  return num;
+}
+
+boolean
+TxReSample::nextPow2(uint8** image, int* width, int* height, int bpp, boolean use_3dfx = 0)
+{
+  /* NOTE: bpp must be one of the follwing: 8, 16, 24, 32 bits per pixel */
+
+  if (!*image || !*width || !*height || !bpp)
+    return 0;
+
+  int row_bytes = ((*width * bpp) >> 3);
+  int o_row_bytes = row_bytes;
+  int o_width = *width;
+  int n_width = *width;
+  int o_height = *height;
+  int n_height = *height;
+
+  /* HACKALERT: I have explicitly subtracted (n) from width/height to
+   * adjust textures that have (n) pixel larger width/height than
+   * power of 2 size. This is a dirty hack for textures that have
+   * munged aspect ratio by (n) pixel to the original.
+   */
+  if      (n_width  > 64) n_width  -= 4;
+  else if (n_width  > 16) n_width  -= 2;
+  else if (n_width  >  4) n_width  -= 1;
+
+  if      (n_height > 64) n_height -= 4;
+  else if (n_height > 16) n_height -= 2;
+  else if (n_height >  4) n_height -= 1;
+
+  n_width = nextPow2(n_width);
+  n_height = nextPow2(n_height);
+  row_bytes = (n_width * bpp) >> 3;
+
+  /* 3dfx Glide3 format, W:H aspect ratio range (8:1 - 1:8) */
+  if (use_3dfx) {
+    if (n_width > n_height) {
+      if (n_width > (n_height << 3))
+        n_height = n_width >> 3;
+    } else {
+      if (n_height > (n_width << 3)) {
+        n_width = n_height >> 3;
+        row_bytes = (n_width * bpp) >> 3;
+      }
+    }
+    DBG_INFO(80, L"using 3dfx W:H aspect ratio range (8:1 - 1:8).\n");
+  }
+
+  /* do we really need to do this ? */
+  if (o_width == n_width && o_height == n_height)
+    return 1; /* nope */
+
+  DBG_INFO(80, L"expand image to next power of 2 dimensions. %d x %d -> %d x %d\n",
+           o_width, o_height, n_width, n_height);
+
+  if (o_width > n_width)
+    o_width = n_width;
+
+  if (o_height > n_height)
+    o_height = n_height;
+
+  /* allocate memory to read in image */
+  uint8 *pow2image = (uint8*)malloc(row_bytes * n_height);
+
+  /* read in image */
+  if (pow2image) {
+    int i, j;
+    uint8 *tmpimage = *image, *tmppow2image = pow2image;
+
+    for (i = 0; i < o_height; i++) {
+      /* copy row */
+      memcpy(tmppow2image, tmpimage, ((o_width * bpp) >> 3));
+
+      /* expand to pow2 size by replication */
+      for(j = ((o_width * bpp) >> 3); j < row_bytes; j++)
+        tmppow2image[j] = tmppow2image[j - (bpp >> 3)];
+
+      tmppow2image += row_bytes;
+      tmpimage += o_row_bytes;
+    }
+    /* expand to pow2 size by replication */
+    for (i = o_height; i < n_height; i++)
+      memcpy(&pow2image[row_bytes * i], &pow2image[row_bytes * (i - 1)], row_bytes);
+
+    free(*image);
+
+    *image = pow2image;
+    *height = n_height;
+    *width = n_width;
+
+    return 1;
+  }
+
+  return 0;
+}
+
+/* Ken Turkowski
+ * Filters for Common Resampling Tasks
+ * Apple Computer 1990
+ */
+double
+TxReSample::tent(double x)
+{
+  if (x < 0.0) x = -x;
+  if (x < 1.0) return (1.0 - x);
+  return 0.0;
+}
+
+double
+TxReSample::gaussian(double x)
+{
+  if (x < 0) x = -x;
+  if (x < 2.0) return pow(2.0, -2.0 * x * x);
+  return 0.0;
+}
+
+double 
+TxReSample::sinc(double x)
+{
+  if (x == 0) return 1.0;
+  x *= M_PI;
+  return (sin(x) / x);
+}
+
+double 
+TxReSample::lanczos3(double x)
+{
+  if (x < 0) x = -x;
+  if (x < 3.0) return (sinc(x) * sinc(x/3.0));
+  return 0.0;
+}
+
+/* Don P. Mitchell and Arun N. Netravali
+ * Reconstruction Filters in Computer Graphics
+ * SIGGRAPH '88
+ * Proceedings of the 15th annual conference on Computer 
+ * graphics and interactive techniques, pp221-228, 1988
+ */
+double
+TxReSample::mitchell(double x)
+{
+  if (x < 0) x = -x;
+  if (x < 2.0) {
+    const double B = 1.0 / 3.0;
+    const double C = 1.0 / 3.0;
+    if (x < 1.0) {
+      x = (((12.0 - 9.0 * B - 6.0 * C) * (x * x * x))
+           + ((-18.0 + 12.0 * B + 6.0 * C) * (x * x))
+           + (6.0 - 2.0 * B));
+    } else {
+      x = (((-1.0 * B - 6.0 * C) * (x * x * x))
+           + ((6.0 * B + 30.0 * C) * (x * x))
+           + ((-12.0 * B - 48.0 * C) * x)
+           + (8.0 * B + 24.0 * C));
+    }
+    return (x / 6.0);
+  }
+  return 0.0;
+}
+
+/* J. F. Kaiser and W. A. Reed
+ * Data smoothing using low-pass digital filters
+ * Rev. Sci. instrum. 48 (11), pp1447-1457, 1977
+ */
+double
+TxReSample::besselI0(double x)
+{
+  /* zero-order modified bessel function of the first kind */
+  const double eps_coeff = 1E-16; /* small enough */
+  double xh, sum, pow, ds;
+  xh = 0.5 * x;
+  sum = 1.0;
+  pow = 1.0;
+  ds = 1.0;
+  int k = 0;
+  while (ds > sum * eps_coeff) {
+    k++;
+    pow *= (xh / k);
+    ds = pow * pow;
+    sum = sum + ds;
+  }
+  return sum;
+}
+
+double
+TxReSample::kaiser(double x)
+{
+  const double alpha = 4.0;
+  const double half_window = 5.0;
+  const double ratio = x / half_window;
+  return sinc(x) * besselI0(alpha * sqrt(1 - ratio * ratio)) / besselI0(alpha);
+}
+
+boolean
+TxReSample::minify(uint8 **src, int *width, int *height, int ratio)
+{
+  /* NOTE: src must be ARGB8888, ratio is the inverse representation */
+
+#if 0
+  if (!*src || ratio < 2) return 0;
+
+  /* Box filtering.
+   * It would be nice to do Kaiser filtering.
+   * N64 uses narrow strip textures which makes it hard to filter effectively.
+   */
+
+  int x, y, x2, y2, offset, numtexel;
+  uint32 A, R, G, B, texel;
+
+  int tmpwidth = *width / ratio;
+  int tmpheight = *height / ratio;
+
+  uint8 *tmptex = (uint8*)malloc((tmpwidth * tmpheight) << 2);
+
+  if (tmptex) {
+    numtexel = ratio * ratio;
+    for (y = 0; y < tmpheight; y++) {
+      offset = ratio * y * *width;
+      for (x = 0; x < tmpwidth; x++) {
+        A = R = G = B = 0;
+        for (y2 = 0; y2 < ratio; y2++) {
+          for (x2 = 0; x2 < ratio; x2++) {
+            texel = ((uint32*)*src)[offset + *width * y2 + x2];
+            A += (texel >> 24);
+            R += ((texel >> 16) & 0x000000ff);
+            G += ((texel >> 8) & 0x000000ff);
+            B += (texel & 0x000000ff);
+          }
+        }
+        A = (A + ratio) / numtexel;
+        R = (R + ratio) / numtexel;
+        G = (G + ratio) / numtexel;
+        B = (B + ratio) / numtexel;
+        ((uint32*)tmptex)[y * tmpwidth + x] = ((A << 24) | (R << 16) | (G << 8) | B);
+        offset += ratio;
+      }
+    }
+    free(*src);
+    *src = tmptex;
+    *width = tmpwidth;
+    *height = tmpheight;
+
+    DBG_INFO(80, L"minification ratio:%d -> %d x %d\n", ratio, *width, *height);
+
+    return 1;
+  }
+
+  DBG_INFO(80, L"Error: failed minification!\n");
+
+  return 0;
+
+#else
+
+  if (!*src || ratio < 2) return 0;
+
+  /* Image Resampling */
+  
+  /* half width of filter window.
+   * NOTE: must be 1.0 or larger. 
+   *
+   * kaiser-bessel 5, lanczos3 3, mitchell 2, gaussian 1.5, tent 1
+   */
+  double half_window = 5.0;
+
+  int x, y, x2, y2, z;
+  double A, R, G, B;
+  uint32 texel;
+
+  int tmpwidth = *width / ratio;
+  int tmpheight = *height / ratio;
+
+  /* resampled destination */
+  uint8 *tmptex = (uint8*)malloc((tmpwidth * tmpheight) << 2);
+  if (!tmptex) return 0;
+
+  /* work buffer. single row */
+  uint8 *workbuf = (uint8*)malloc(*width << 2);
+  if (!workbuf) {
+    free(tmptex);
+    return 0;
+  }
+
+  /* prepare filter lookup table. only half width required for symetric filters. */
+  double *weight = (double*)malloc((int)((half_window * ratio) * sizeof(double)));
+  if (!weight) {
+    free(tmptex);
+    free(workbuf);
+    return 0;
+  }
+  for (x = 0; x < half_window * ratio; x++) {
+    //weight[x] = tent((double)x / ratio) / ratio;
+    //weight[x] = gaussian((double)x / ratio) / ratio;
+    //weight[x] = lanczos3((double)x / ratio) / ratio;
+    //weight[x] = mitchell((double)x / ratio) / ratio;
+    weight[x] = kaiser((double)x / ratio) / ratio;
+  }
+
+  /* linear convolution */
+  for (y = 0; y < tmpheight; y++) {
+    for (x = 0; x < *width; x++) {
+      texel = ((uint32*)*src)[y * ratio * *width + x];
+      A = (double)(texel >> 24) * weight[0];
+      R = (double)((texel >> 16) & 0xff) * weight[0];
+      G = (double)((texel >>  8) & 0xff) * weight[0];
+      B = (double)((texel      ) & 0xff) * weight[0];
+      for (y2 = 1; y2 < half_window * ratio; y2++) {
+        z = y * ratio + y2;
+        if (z >= *height) z = *height - 1;
+        texel = ((uint32*)*src)[z * *width + x];
+        A += (double)(texel >> 24) * weight[y2];
+        R += (double)((texel >> 16) & 0xff) * weight[y2];
+        G += (double)((texel >>  8) & 0xff) * weight[y2];
+        B += (double)((texel      ) & 0xff) * weight[y2];
+        z = y * ratio - y2;
+        if (z < 0) z = 0;
+        texel = ((uint32*)*src)[z * *width + x];
+        A += (double)(texel >> 24) * weight[y2];
+        R += (double)((texel >> 16) & 0xff) * weight[y2];
+        G += (double)((texel >>  8) & 0xff) * weight[y2];
+        B += (double)((texel      ) & 0xff) * weight[y2];
+      }
+      if (A < 0) A = 0; else if (A > 255) A = 255;
+      if (R < 0) R = 0; else if (R > 255) R = 255;
+      if (G < 0) G = 0; else if (G > 255) G = 255;
+      if (B < 0) B = 0; else if (B > 255) B = 255;
+      ((uint32*)workbuf)[x] = (((uint32)A << 24) | ((uint32)R << 16) | ((uint32)G << 8) | (uint32)B);
+    }
+    for (x = 0; x < tmpwidth; x++) {
+      texel = ((uint32*)workbuf)[x * ratio];
+      A = (double)(texel >> 24) * weight[0];
+      R = (double)((texel >> 16) & 0xff) * weight[0];
+      G = (double)((texel >>  8) & 0xff) * weight[0];
+      B = (double)((texel      ) & 0xff) * weight[0];
+      for (x2 = 1; x2 < half_window * ratio; x2++) {
+        z = x * ratio + x2;
+        if (z >= *width) z = *width - 1;
+        texel = ((uint32*)workbuf)[z];
+        A += (double)(texel >> 24) * weight[x2];
+        R += (double)((texel >> 16) & 0xff) * weight[x2];
+        G += (double)((texel >>  8) & 0xff) * weight[x2];
+        B += (double)((texel      ) & 0xff) * weight[x2];
+        z = x * ratio - x2;
+        if (z < 0) z = 0;
+        texel = ((uint32*)workbuf)[z];
+        A += (double)(texel >> 24) * weight[x2];
+        R += (double)((texel >> 16) & 0xff) * weight[x2];
+        G += (double)((texel >>  8) & 0xff) * weight[x2];
+        B += (double)((texel      ) & 0xff) * weight[x2];
+      }
+      if (A < 0) A = 0; else if (A > 255) A = 255;
+      if (R < 0) R = 0; else if (R > 255) R = 255;
+      if (G < 0) G = 0; else if (G > 255) G = 255;
+      if (B < 0) B = 0; else if (B > 255) B = 255;
+      ((uint32*)tmptex)[y * tmpwidth + x] = (((uint32)A << 24) | ((uint32)R << 16) | ((uint32)G << 8) | (uint32)B);
+    }
+  }
+
+  free(*src);
+  *src = tmptex;
+  free(weight);
+  free(workbuf);
+  *width = tmpwidth;
+  *height = tmpheight;
+
+  DBG_INFO(80, L"minification ratio:%d -> %d x %d\n", ratio, *width, *height);
+
+  return 1;
+#endif
+}
diff --git a/Source/GlideHQ/TxReSample.h b/Source/GlideHQ/TxReSample.h
new file mode 100644
index 000000000..805647d29
--- /dev/null
+++ b/Source/GlideHQ/TxReSample.h
@@ -0,0 +1,45 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXRESAMPLE_H__
+#define __TXRESAMPLE_H__
+
+#include "TxInternal.h"
+
+class TxReSample
+{
+private:
+  double tent(double x);
+  double gaussian(double x);
+  double sinc(double x);
+  double lanczos3(double x);
+  double mitchell(double x);
+  double besselI0(double x);
+  double kaiser(double x);
+public:
+  boolean minify(uint8 **src, int *width, int *height, int ratio);
+  boolean nextPow2(uint8** image, int* width, int* height, int bpp, boolean use_3dfx);
+  int nextPow2(int num);
+};
+
+#endif /* __TXRESAMPLE_H__ */
diff --git a/Source/GlideHQ/TxTexCache.cpp b/Source/GlideHQ/TxTexCache.cpp
new file mode 100644
index 000000000..68e526402
--- /dev/null
+++ b/Source/GlideHQ/TxTexCache.cpp
@@ -0,0 +1,80 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef WIN32
+#pragma warning(disable: 4786)
+#endif
+
+/* dump cache to disk (0:disable, 1:enable) */
+#define DUMP_CACHE 1
+
+#include "TxTexCache.h"
+#include "TxDbg.h"
+#include <zlib/zlib.h>
+#include <string>
+#include <Common/path.h>
+
+TxTexCache::~TxTexCache()
+{
+#if DUMP_CACHE
+  if (_options & DUMP_TEXCACHE) {
+    /* dump cache to disk */
+    std::wstring filename = _ident + L"_MEMORYCACHE.dat";
+    CPath cachepath(stdstr().FromUTF16(_path.c_str()).c_str(),"");
+	cachepath.AppendDirectory("cache");
+
+    int config = _options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESS_TEX|COMPRESSION_MASK|FORCE16BPP_TEX|GZ_TEXCACHE);
+
+    TxCache::save(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config);
+  }
+#endif
+}
+
+TxTexCache::TxTexCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+                       dispInfoFuncExt callback
+                       ) : TxCache((options & ~GZ_HIRESTEXCACHE), cachesize, path, ident, callback)
+{
+  /* assert local options */
+  if (_path.empty() || _ident.empty() || !_cacheSize)
+    _options &= ~DUMP_TEXCACHE;
+
+#if DUMP_CACHE
+  if (_options & DUMP_TEXCACHE) {
+    /* find it on disk */
+    std::wstring filename = _ident + L"_MEMORYCACHE.dat";
+	CPath cachepath(stdstr().FromUTF16(_path.c_str()),"");
+    cachepath.AppendDirectory("cache");
+    int config = _options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESS_TEX|COMPRESSION_MASK|FORCE16BPP_TEX|GZ_TEXCACHE);
+
+    TxCache::load(stdstr(cachepath).ToUTF16().c_str(), filename.c_str(), config);
+  }
+#endif
+}
+
+boolean
+TxTexCache::add(uint64 checksum, GHQTexInfo *info)
+{
+  if (_cacheSize <= 0) return 0;
+
+  return TxCache::add(checksum, info);
+}
diff --git a/Source/GlideHQ/TxTexCache.h b/Source/GlideHQ/TxTexCache.h
new file mode 100644
index 000000000..b80209533
--- /dev/null
+++ b/Source/GlideHQ/TxTexCache.h
@@ -0,0 +1,39 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXTEXCACHE_H__
+#define __TXTEXCACHE_H__
+
+#include "TxCache.h"
+
+class TxTexCache : public TxCache
+{
+public:
+  ~TxTexCache();
+  TxTexCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+             dispInfoFuncExt callback);
+  boolean add(uint64 checksum, /* checksum hi:palette low:texture */
+              GHQTexInfo *info);
+};
+
+#endif /* __TXTEXCACHE_H__ */
diff --git a/Source/GlideHQ/TxUtil.cpp b/Source/GlideHQ/TxUtil.cpp
new file mode 100644
index 000000000..4e72cd8da
--- /dev/null
+++ b/Source/GlideHQ/TxUtil.cpp
@@ -0,0 +1,1006 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "TxUtil.h"
+#include "TxDbg.h"
+#include <zlib/zlib.h>
+#include <malloc.h>
+
+/*
+ * External libraries
+ ******************************************************************************/
+TxLoadLib::TxLoadLib()
+{
+#ifdef DXTN_DLL
+  if (!_dxtnlib)
+    _dxtnlib = LoadLibrary("dxtn");
+
+  if (_dxtnlib) {
+    if (!_tx_compress_dxtn)
+      _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
+
+    if (!_tx_compress_fxt1)
+      _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
+  }
+#else
+  _tx_compress_dxtn = tx_compress_dxtn;
+  _tx_compress_fxt1 = fxt1_encode;
+
+#endif
+}
+
+TxLoadLib::~TxLoadLib()
+{
+#ifdef DXTN_DLL
+  /* free dynamic library */
+  if (_dxtnlib)
+    FreeLibrary(_dxtnlib);
+#endif
+
+}
+
+fxtCompressTexFuncExt
+TxLoadLib::getfxtCompressTexFuncExt()
+{
+  return _tx_compress_fxt1;
+}
+
+dxtCompressTexFuncExt
+TxLoadLib::getdxtCompressTexFuncExt()
+{
+  return _tx_compress_dxtn;
+}
+
+
+/*
+ * Utilities
+ ******************************************************************************/
+uint32
+TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
+{
+  int dataSize = sizeofTx(width, height, format);
+
+  /* for now we use adler32 if something else is better
+   * we can simply swtich later
+   */
+  /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
+
+  /* zlib crc32 */
+  return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
+}
+
+int
+TxUtil::sizeofTx(int width, int height, uint16 format)
+{
+  int dataSize = 0;
+
+  /* a lookup table for the shifts would be better */
+  switch (format) {
+  case GR_TEXFMT_ARGB_CMP_FXT1:
+    dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
+    break;
+  case GR_TEXFMT_ARGB_CMP_DXT1:
+    dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
+    break;
+  case GR_TEXFMT_ARGB_CMP_DXT3:
+  case GR_TEXFMT_ARGB_CMP_DXT5:
+    dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
+    break;
+  case GR_TEXFMT_ALPHA_INTENSITY_44:
+  case GR_TEXFMT_ALPHA_8:
+  case GR_TEXFMT_INTENSITY_8:
+  case GR_TEXFMT_P_8:
+    dataSize = width * height;
+    break;
+  case GR_TEXFMT_ARGB_4444:
+  case GR_TEXFMT_ARGB_1555:
+  case GR_TEXFMT_RGB_565:
+  case GR_TEXFMT_ALPHA_INTENSITY_88:
+    dataSize = (width * height) << 1;
+    break;
+  case GR_TEXFMT_ARGB_8888:
+    dataSize = (width * height) << 2;
+    break;
+  default:
+    /* unsupported format */
+    DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
+    ;
+  }
+
+  return dataSize;
+}
+
+#if 0 /* unused */
+uint32
+TxUtil::chkAlpha(uint32* src, int width, int height)
+{
+  /* NOTE: _src must be ARGB8888
+   * return values
+   * 0x00000000: 8bit alpha
+   * 0x00000001: 1bit alpha
+   * 0xff000001: no alpha
+   */
+
+  int _size = width * height;
+  uint32 alpha = 0;
+
+  __asm {
+    mov esi, dword ptr [src];
+    mov ecx, dword ptr [_size];
+    mov ebx, 0xff000000;
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xff000000;
+    jz  alpha1bit;
+    cmp eax, 0xff000000;
+    je  alpha1bit;
+    jmp done;
+
+  alpha1bit:
+    and ebx, eax;
+    dec ecx;
+    jnz tc1_loop;
+
+    or  ebx, 0x00000001;
+    mov dword ptr [alpha], ebx;
+
+  done:
+  }
+
+  return alpha;
+}
+#endif
+
+uint32
+TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
+{
+  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
+   * any other custom checksum.
+   * TODO: use *_HIRESTEXTURE option. */
+
+  if (!src) return 0;
+
+  return RiceCRC32(src, width, height, size, rowStride);
+}
+
+uint64
+TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
+   * any other custom checksum.
+   * TODO: use *_HIRESTEXTURE option. */
+  /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
+
+  if (!src) return 0;
+
+  uint64 crc64Ret = 0;
+
+  if (palette) {
+    uint32 crc32 = 0, cimax = 0;
+    switch (size & 0xff) {
+    case 1:
+      if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
+        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
+        crc64Ret <<= 32;
+        crc64Ret |= (uint64)crc32;
+      }
+      break;
+    case 0:
+      if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
+        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
+        crc64Ret <<= 32;
+        crc64Ret |= (uint64)crc32;
+      }
+    }
+  }
+  if (!crc64Ret) {
+    crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
+  }
+
+  return crc64Ret;
+}
+
+/*
+** Computes Adler32 checksum for a stream of data.
+**
+** From the specification found in RFC 1950: (ZLIB Compressed Data Format
+** Specification version 3.3)
+**
+** ADLER32 (Adler-32 checksum) This contains a checksum value of the
+** uncompressed data (excluding any dictionary data) computed according to
+** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
+** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
+**
+** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
+** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
+** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
+** as s2*65536 + s1 in most-significant-byte first (network) order.
+**
+** 8.2. The Adler-32 algorithm 
+**
+** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
+** provides an extremely low probability of undetected errors.
+**
+** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
+** so the modulo operation time is negligible. If the bytes are a, b, c,
+** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
+** unlike the first sum, which is just a checksum. That 65521 is prime is
+** important to avoid a possible large class of two-byte errors that leave
+** the check unchanged. (The Fletcher checksum uses 255, which is not prime
+** and which also makes the Fletcher check insensitive to single byte
+** changes 0 <-> 255.)
+**
+** The sum s1 is initialized to 1 instead of zero to make the length of
+** the sequence part of s2, so that the length does not have to be checked
+** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
+*/
+
+uint32
+TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
+{
+#if 1
+  /* zlib adler32 */
+  return adler32(dwAdler32, data, Len);
+#else
+  register uint32 s1 = dwAdler32 & 0xFFFF;
+  register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
+  int k;
+
+  while (Len > 0) {
+    /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+    k = (Len < 5552 ? Len : 5552);
+    Len -= k;
+    while (k--) {
+      s1 += *data++;
+      s2 += s1;
+    }
+    /* 65521 is the largest prime smaller than 65536 */
+    s1 %= 65521;
+    s2 %= 65521;
+  }
+
+  return (s2 << 16) | s1;
+#endif
+}
+
+uint32
+TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
+{
+  int i;
+  uint32 ret = 1;
+  uint32 width_in_bytes = width * size;
+
+  for (i = 0; i < height; i++) {
+    ret = Adler32(src, width_in_bytes, ret);
+    src += rowStride;
+  }
+
+  return ret;
+}
+
+/* Rice CRC32 for hires texture packs */
+/* NOTE: The following is used in Glide64 to calculate the CRC32
+ * for Rice hires texture packs.
+ *
+ * BYTE* addr = (BYTE*)(gfx.RDRAM +
+ *                     rdp.addr[rdp.tiles[tile].t_mem] +
+ *                     (rdp.tiles[tile].ul_t * bpl) +
+ *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
+ * RiceCRC32(addr,
+ *          rdp.tiles[tile].width,
+ *          rdp.tiles[tile].height,
+ *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
+ *          bpl);
+ */
+uint32
+TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  try {
+#ifdef WIN32
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  return crc32Ret;
+}
+
+boolean
+TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  uint32 cimaxRet = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  /* 4bit CI */
+  try {
+#ifdef WIN32
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      mov edi, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+
+      cmp edi, 0x0000000f;
+      je findmax0;
+
+      push ecx;
+      mov ecx, esi;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax8;
+      mov edi, ecx;
+
+    findmax8:
+      mov ecx, esi;
+      shr ecx, 4;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax7;
+      mov edi, ecx;
+
+    findmax7:
+      mov ecx, esi;
+      shr ecx, 8;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax6;
+      mov edi, ecx;
+
+    findmax6:
+      mov ecx, esi;
+      shr ecx, 12;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax5;
+      mov edi, ecx;
+
+    findmax5:
+      mov ecx, esi;
+      shr ecx, 16;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax4;
+      mov edi, ecx;
+
+    findmax4:
+      mov ecx, esi;
+      shr ecx, 20;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax3;
+      mov edi, ecx;
+
+    findmax3:
+      mov ecx, esi;
+      shr ecx, 24;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax2;
+      mov edi, ecx;
+
+    findmax2:
+      mov ecx, esi;
+      shr ecx, 28;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax1;
+      mov edi, ecx;
+
+    findmax1:
+      pop ecx;
+
+    findmax0:
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+      mov dword ptr [cimaxRet], edi;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "movl $0, %%edi \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+
+      "cmpl $0x0000000f, %%edi \n"
+      "je  10f \n"
+
+      "pushl %%ecx \n"
+      "movl %%esi, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   2f \n"
+      "movl %%ecx, %%edi \n"
+
+      "2: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $4, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   3f \n"
+      "movl %%ecx, %%edi \n"
+
+      "3: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $8, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   4f \n"
+      "movl %%ecx, %%edi \n"
+
+      "4: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $12, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   5f \n"
+      "movl %%ecx, %%edi \n"
+
+      "5: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $16, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   6f \n"
+      "movl %%ecx, %%edi \n"
+
+      "6: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $20, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   7f \n"
+      "movl %%ecx, %%edi \n"
+
+      "7: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $24, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   8f \n"
+      "movl %%ecx, %%edi \n"
+
+      "8: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $28, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   9f \n"
+      "movl %%ecx, %%edi \n"
+
+      "9: \n"
+      "popl %%ecx \n"
+
+      "10: \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+      "movl %%edi, %5 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  *crc32 = crc32Ret;
+  *cimax = cimaxRet;
+
+  return 1;
+}
+
+boolean
+TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
+                      uint32* crc32, uint32* cimax)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  uint32 cimaxRet = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  /* 8bit CI */
+  try {
+#ifdef WIN32
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      mov edi, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+
+      cmp edi, 0x000000ff;
+      je findmax0;
+
+      push ecx;
+      mov ecx, esi;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax4;
+      mov edi, ecx;
+
+    findmax4:
+      mov ecx, esi;
+      shr ecx, 8;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax3;
+      mov edi, ecx;
+
+    findmax3:
+      mov ecx, esi;
+      shr ecx, 16;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax2;
+      mov edi, ecx;
+
+    findmax2:
+      mov ecx, esi;
+      shr ecx, 24;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax1;
+      mov edi, ecx;
+
+    findmax1:
+      pop ecx;
+
+    findmax0:
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+      mov dword ptr [cimaxRet], edi;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "movl $0, %%edi \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+
+      "cmpl $0x000000ff, %%edi \n"
+      "je   6f \n"
+
+      "pushl %%ecx \n"
+      "movl %%esi, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   2f \n"
+      "movl %%ecx, %%edi \n"
+
+      "2: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $8, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   3f \n"
+      "movl %%ecx, %%edi \n"
+
+      "3: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $16, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   4f \n"
+      "movl %%ecx, %%edi \n"
+
+      "4: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $24, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   5f \n"
+      "movl %%ecx, %%edi \n"
+
+      "5: \n"
+      "popl %%ecx \n"
+
+      "6: \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+      "movl %%edi, %5 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  *crc32 = crc32Ret;
+  *cimax = cimaxRet;
+
+  return 1;
+}
+
+int
+TxUtil::log2(int num)
+{
+  int i = 0;
+
+#if 1
+  if (!num) return 0;
+#ifdef WIN32
+  __asm {
+    mov eax, dword ptr [num];
+    bsr eax, eax;
+    mov dword ptr [i], eax;
+  }
+#else
+  asm volatile(
+    "movl %0, %%eax \n"
+    "bsrl %%eax, %%eax \n"
+    "movl %%eax, %1 \n"
+    :
+    : "m"(num), "m"(i)
+    : "memory", "cc"
+    );
+#endif
+#else
+  switch (num) {
+    case 1:    return 0;
+    case 2:    return 1;
+    case 4:    return 2;
+    case 8:    return 3;
+    case 16:   return 4;
+    case 32:   return 5;
+    case 64:   return 6;
+    case 128:  return 7;
+    case 256:  return 8;
+    case 512:  return 9;
+    case 1024:  return 10;
+    case 2048:  return 11;
+  }
+#endif
+
+  return i;
+}
+
+int
+TxUtil::grLodLog2(int w, int h)
+{
+  return (w >= h ? log2(w) : log2(h));
+}
+
+int
+TxUtil::grAspectRatioLog2(int w, int h)
+{
+  return (w >= h ? log2(w/h) : -log2(h/w));
+}
+
+int
+TxUtil::getNumberofProcessors()
+{
+  int numcore = 1;
+
+  /* number of logical processors per physical processor */
+  try {
+#ifdef WIN32
+#if 1
+    /* use win32 api */
+    SYSTEM_INFO siSysInfo;
+    ZeroMemory(&siSysInfo, sizeof(SYSTEM_INFO));
+    GetSystemInfo(&siSysInfo);
+    numcore = siSysInfo.dwNumberOfProcessors;
+#else
+    __asm {
+      push ebx;
+
+      mov eax, 1;
+      cpuid;
+      test edx, 0x10000000; /* check HTT */
+      jz uniproc;
+      and ebx, 0x00ff0000;  /* mask logical core counter bit */
+      shr ebx, 16;
+      mov dword ptr [numcore], ebx;
+    uniproc:
+
+      pop ebx;
+    }
+#endif
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+
+      "movl $1, %%eax \n"
+      "cpuid \n"
+      "testl $0x10000000, %%edx \n"
+      "jz 0f \n"
+      "andl $0x00ff0000, %%ebx \n"
+      "shrl $16, %%ebx \n"
+      "movl %%ebx, %0 \n"
+      "0: \n"
+
+      "popl %%ebx \n"
+      :
+      : "m"(numcore)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: number of processor detection failed!\n");
+  }
+
+  if (numcore > MAX_NUMCORE) numcore = MAX_NUMCORE;
+
+  DBG_INFO(80, L"Number of processors : %d\n", numcore);
+
+  return numcore;
+}
+
+
+/*
+ * Memory buffers for texture manipulations
+ ******************************************************************************/
+TxMemBuf::TxMemBuf()
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    _tex[i] = NULL;
+    _size[i] = 0;
+  }
+}
+
+TxMemBuf::~TxMemBuf()
+{
+  shutdown();
+}
+
+boolean
+TxMemBuf::init(int maxwidth, int maxheight)
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    if (!_tex[i]) {
+      _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
+      _size[i] = maxwidth * maxheight * 4;
+    }
+
+    if (!_tex[i]) {
+      shutdown();
+      return 0;
+    }
+  }
+  return 1;
+}
+
+void
+TxMemBuf::shutdown()
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    if (_tex[i]) free(_tex[i]);
+    _tex[i] = NULL;
+    _size[i] = 0;
+  }
+}
+
+uint8*
+TxMemBuf::get(unsigned int num)
+{
+  return ((num < 2) ? _tex[num] : NULL);
+}
+
+uint32
+TxMemBuf::size_of(unsigned int num)
+{
+  return ((num < 2) ? _size[num] : 0);
+}
diff --git a/Source/GlideHQ/TxUtil.h b/Source/GlideHQ/TxUtil.h
new file mode 100644
index 000000000..b89f660df
--- /dev/null
+++ b/Source/GlideHQ/TxUtil.h
@@ -0,0 +1,121 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXUTIL_H__
+#define __TXUTIL_H__
+
+/* maximum number of CPU cores allowed */
+#define MAX_NUMCORE 8
+
+#include "TxInternal.h"
+#include <string>
+
+#ifndef DXTN_DLL
+#ifdef __cplusplus
+extern "C"{
+#endif
+void tx_compress_dxtn(int srccomps, int width, int height,
+                      const void *source, int destformat, void *dest,
+                      int destRowStride);
+
+int fxt1_encode(int width, int height, int comps,
+                const void *source, int srcRowStride,
+                void *dest, int destRowStride);
+#ifdef __cplusplus
+}
+#endif
+#endif /* DXTN_DLL */
+
+typedef void (*dxtCompressTexFuncExt)(int srccomps, int width,
+                                      int height, const void *srcPixData,
+                                      int destformat, void *dest,
+                                      int dstRowStride);
+
+typedef int (*fxtCompressTexFuncExt)(int width, int height, int comps,
+                                     const void *source, int srcRowStride,
+                                     void *dest, int destRowStride);
+
+class TxLoadLib
+{
+private:
+#ifdef DXTN_DLL
+  HMODULE _dxtnlib;
+#endif
+  fxtCompressTexFuncExt _tx_compress_fxt1;
+  dxtCompressTexFuncExt _tx_compress_dxtn;
+  TxLoadLib();
+public:
+  static TxLoadLib* getInstance() {
+    static TxLoadLib txLoadLib;
+    return &txLoadLib;
+  }
+  ~TxLoadLib();
+  fxtCompressTexFuncExt getfxtCompressTexFuncExt();
+  dxtCompressTexFuncExt getdxtCompressTexFuncExt();
+};
+
+class TxUtil
+{
+private:
+  uint32 Adler32(const uint8* data, int Len, uint32 Adler);
+  uint32 Adler32(const uint8* src, int width, int height, int size, int rowStride);
+  uint32 RiceCRC32(const uint8* src, int width, int height, int size, int rowStride);
+  boolean RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax);
+  boolean RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax);
+  int log2(int num);
+public:
+  TxUtil() { }
+  ~TxUtil() { }
+  int sizeofTx(int width, int height, uint16 format);
+  uint32 checksumTx(uint8 *data, int width, int height, uint16 format);
+#if 0 /* unused */
+  uint32 chkAlpha(uint32* src, int width, int height);
+#endif
+  uint32 checksum(uint8 *src, int width, int height, int size, int rowStride);
+  uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette);
+  int grLodLog2(int w, int h);
+  int grAspectRatioLog2(int w, int h);
+  int getNumberofProcessors();
+};
+
+class TxMemBuf
+{
+private:
+  uint8 *_tex[2];
+  uint32 _size[2];
+  TxMemBuf();
+public:
+  static TxMemBuf* getInstance() {
+    static TxMemBuf txMemBuf;
+    return &txMemBuf;
+  }
+  ~TxMemBuf();
+  boolean init(int maxwidth, int maxheight);
+  void shutdown(void);
+  uint8 *get(unsigned int num);
+  uint32 size_of(unsigned int num);
+};
+
+#endif /* __TXUTIL_H__ */
diff --git a/Source/GlideHQ/bldno.h b/Source/GlideHQ/bldno.h
new file mode 100644
index 000000000..e69de29bb
diff --git a/Source/GlideHQ/tc-1.1+/dxtn.c b/Source/GlideHQ/tc-1.1+/dxtn.c
new file mode 100644
index 000000000..e2d335ae0
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/dxtn.c
@@ -0,0 +1,884 @@
+/*
+ * DXTn codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and
+ * YUV conversions to determine representative colors.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <stdio.h>
+
+#include "types.h"
+#include "internal.h"
+#include "dxtn.h"
+
+
+/***************************************************************************\
+ * DXTn encoder
+ *
+ * The encoder was built by reversing the decoder,
+ * and is vaguely based on FXT1 codec. Note that this code
+ * is merely a proof of concept, since it is highly UNoptimized!
+\***************************************************************************/
+
+
+#define MAX_COMP 4 /* ever needed maximum number of components in texel */
+#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
+#define N_TEXELS 16 /* number of texels in a block (always 16) */
+#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
+
+
+static const int dxtn_color_tlat[2][4] = {
+    { 0, 2, 3, 1 },
+    { 0, 2, 1, 3 }
+};
+
+static const int dxtn_alpha_tlat[2][8] = {
+    { 0, 2, 3, 4, 5, 6, 7, 1 },
+    { 0, 2, 3, 4, 5, 1, 6, 7 }
+};
+
+
+static void
+dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];   /* interpolation vector */
+
+    dword hi; /* high doubleword */
+    int color0, color1;
+    int n_vect;
+    const int n_comp = 3;
+    int black = 0;
+
+#ifndef YUV
+    int minSum = 2000; /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1; /* small enough */
+    int minCol = 0; /* phoudoin: silent compiler! */
+    int maxCol = 0; /* phoudoin: silent compiler! */
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    /* make the whole block opaque */
+    /* we will NEVER reference ACOMP of any pixel */
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        /* RGB to YUV conversion according to CCIR 601 specs
+         * Y = 0.299R+0.587G+0.114B
+         * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
+         * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
+         */
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+	if (sum == 0) {
+	    black = 1;
+	}
+    }
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+    if (color0 == color1) {
+	/* we'll use 3-vector */
+	cc[0] = color0 | (color1 << 16);
+	hi = black ? -1 : 0;
+    } else {
+	if (black && ((color0 == 0) || (color1 == 0))) {
+	    /* we still can use 4-vector */
+	    black = 0;
+	}
+
+	if (black ^ (color0 <= color1)) {
+	    int aux;
+	    aux = color0;
+	    color0 = color1;
+	    color1 = aux;
+	    aux = minCol;
+	    minCol = maxCol;
+	    maxCol = aux;
+	}
+	n_vect = (color0 <= color1) ? 2 : 3;
+
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	cc[0] = color0 | (color1 << 16);
+	hi = 0;
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel = 3;
+	    int sum = 0;
+	    if (black) {
+		for (i = 0; i < n_comp; i++) {
+		    sum += input[k][i];
+		}
+	    }
+	    if (!black || sum) {
+		/* interpolate color */
+		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		texel = dxtn_color_tlat[black][texel];
+	    }
+	    /* add in texel */
+	    hi <<= 2;
+	    hi |= texel;
+	}
+    }
+    cc[1] = hi;
+}
+
+
+static void
+dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];	/* interpolation vector */
+
+    dword hi;		/* high doubleword */
+    int color0, color1;
+    int n_vect;
+    const int n_comp = 3;
+    int transparent = 0;
+
+#ifndef YUV
+    int minSum = 2000;          /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1;		/* small enough */
+    int minCol = 0;		/* phoudoin: silent compiler! */
+    int maxCol = 0;		/* phoudoin: silent compiler! */
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+	if (input[k][ACOMP] < 128) {
+	    transparent = 1;
+	}
+    }
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+    if (color0 == color1) {
+	/* we'll use 3-vector */
+	cc[0] = color0 | (color1 << 16);
+	hi = transparent ? -1 : 0;
+    } else {
+	if (transparent ^ (color0 <= color1)) {
+	    int aux;
+	    aux = color0;
+	    color0 = color1;
+	    color1 = aux;
+	    aux = minCol;
+	    minCol = maxCol;
+	    maxCol = aux;
+	}
+	n_vect = (color0 <= color1) ? 2 : 3;
+
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	cc[0] = color0 | (color1 << 16);
+	hi = 0;
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel = 3;
+	    if (input[k][ACOMP] >= 128) {
+		/* interpolate color */
+		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		texel = dxtn_color_tlat[transparent][texel];
+	    }
+	    /* add in texel */
+	    hi <<= 2;
+	    hi |= texel;
+	}
+    }
+    cc[1] = hi;
+}
+
+
+static void
+dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];	/* interpolation vector */
+
+    dword lolo, lohi;	/* low quadword: lo dword, hi dword */
+    dword hihi;		/* high quadword: high dword */
+    int color0, color1;
+    const int n_vect = 3;
+    const int n_comp = 3;
+
+#ifndef YUV
+    int minSum = 2000;          /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1;		/* small enough */
+    int minCol = 0;		/* phoudoin: silent compiler! */
+    int maxCol = 0;		/* phoudoin: silent compiler! */
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+    }
+
+    /* add in alphas */
+    lolo = lohi = 0;
+    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	/* add in alpha */
+	lohi <<= 4;
+	lohi |= input[k][ACOMP] >> 4;
+    }
+    cc[1] = lohi;
+    for (; k >= 0; k--) {
+	/* add in alpha */
+	lolo <<= 4;
+	lolo |= input[k][ACOMP] >> 4;
+    }
+    cc[0] = lolo;
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+#ifdef RADEON
+    /* H.Morii - Workaround for ATI Radeon
+     * According to the OpenGL EXT_texture_compression_s3tc specs,
+     * the encoding of the RGB components for DXT3 and DXT5 formats
+     * use the non-transparent encodings of DXT1 but treated as
+     * though color0 > color1, regardless of the actual values of
+     * color0 and color1. ATI Radeons however require the values to
+     * be color0 > color1.
+     */
+    if (color0 < color1) {
+	int aux;
+	aux = color0;
+	color0 = color1;
+	color1 = aux;
+	aux = minCol;
+	minCol = maxCol;
+	maxCol = aux;
+    }
+#endif
+
+    cc[2] = color0 | (color1 << 16);
+
+    hihi = 0;
+    if (color0 != color1) {
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    texel = dxtn_color_tlat[0][texel];
+	    /* add in texel */
+	    hihi <<= 2;
+	    hihi |= texel;
+	}
+    }
+    cc[3] = hihi;
+}
+
+
+static void
+dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];	/* interpolation vector */
+
+    qword lo;			/* low quadword */
+    dword hihi;		/* high quadword: high dword */
+    int color0, color1;
+    const int n_vect = 3;
+    const int n_comp = 3;
+
+#ifndef YUV
+    int minSum = 2000;          /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1;		/* small enough */
+    int minCol = 0;		/* phoudoin: silent compiler! */
+    int maxCol = 0;		/* phoudoin: silent compiler! */
+    int alpha0 = 2000;		/* big enough */
+    int alpha1 = -1;		/* small enough */
+    int anyZero = 0, anyOne = 0;
+    int a_vect;
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+	if (alpha0 > input[k][ACOMP]) {
+	    alpha0 = input[k][ACOMP];
+	}
+	if (alpha1 < input[k][ACOMP]) {
+	    alpha1 = input[k][ACOMP];
+	}
+	if (input[k][ACOMP] == 0) {
+	    anyZero = 1;
+	}
+	if (input[k][ACOMP] == 255) {
+	    anyOne = 1;
+	}
+    }
+
+    /* add in alphas */
+    if (alpha0 == alpha1) {
+	/* we'll use 6-vector */
+	cc[0] = alpha0 | (alpha1 << 8);
+	cc[1] = 0;
+    } else {
+	if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) {
+	    /* we still might use 8-vector */
+	    anyZero = 0;
+	}
+	if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) {
+	    /* we still might use 8-vector */
+	    anyOne = 0;
+	}
+	if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) {
+	    int aux;
+	    aux = alpha0;
+	    alpha0 = alpha1;
+	    alpha1 = aux;
+	}
+	a_vect = (alpha0 <= alpha1) ? 5 : 7;
+
+	/* compute interpolation vector */
+	iv[ACOMP] = (float)a_vect / (alpha1 - alpha0);
+	b = -iv[ACOMP] * alpha0 + 0.5F;
+
+	/* add in alphas */
+	Q_MOV32(lo, 0);
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel = -1;
+	    if (anyZero | anyOne) {
+		if (input[k][ACOMP] == 0) {
+		    texel = 6;
+		} else if (input[k][ACOMP] == 255) {
+		    texel = 7;
+		}
+	    }
+	    /* interpolate alpha */
+	    if (texel == -1) {
+		float dot = input[k][ACOMP] * iv[ACOMP];
+		texel = (int)(dot + b);
+#if SAFECDOT
+		if (texel < 0) {
+		    texel = 0;
+		} else if (texel > a_vect) {
+		    texel = a_vect;
+		}
+#endif
+		texel = dxtn_alpha_tlat[anyZero | anyOne][texel];
+	    }
+	    /* add in texel */
+	    Q_SHL(lo, 3);
+	    Q_OR32(lo, texel);
+	}
+	Q_SHL(lo, 16);
+	Q_OR32(lo, alpha0 | (alpha1 << 8));
+	((qword *)cc)[0] = lo;
+    }
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */
+    if (color0 < color1) {
+	int aux;
+	aux = color0;
+	color0 = color1;
+	color1 = aux;
+	aux = minCol;
+	minCol = maxCol;
+	maxCol = aux;
+    }
+#endif
+
+    cc[2] = color0 | (color1 << 16);
+
+    hihi = 0;
+    if (color0 != color1) {
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    texel = dxtn_color_tlat[0][texel];
+	    /* add in texel */
+	    hihi <<= 2;
+	    hihi |= texel;
+	}
+    }
+    cc[3] = hihi;
+}
+
+
+#define ENCODER(dxtn, n)						\
+int TAPIENTRY								\
+dxtn##_encode (int width, int height, int comps,			\
+	       const void *source, int srcRowStride,			\
+	       void *dest, int destRowStride)				\
+{									\
+    int x, y;								\
+    const byte *data;							\
+    dword *encoded = (dword *)dest;					\
+    void *newSource = NULL;						\
+									\
+    /* Replicate image if width is not M4 or height is not M4 */	\
+    if ((width & 3) | (height & 3)) {					\
+	int newWidth = (width + 3) & ~3;				\
+	int newHeight = (height + 3) & ~3;				\
+	newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
+	_mesa_upscale_teximage2d(width, height, newWidth, newHeight,	\
+                               comps, (const byte *)source,		\
+			       srcRowStride, (byte *)newSource);	\
+	source = newSource;						\
+	width = newWidth;						\
+	height = newHeight;						\
+	srcRowStride = comps * newWidth;				\
+    }									\
+									\
+    data = (const byte *)source;					\
+    destRowStride = (destRowStride - width * n) / 4;			\
+    for (y = 0; y < height; y += 4) {					\
+	unsigned int offs = 0 + (y + 0) * srcRowStride;			\
+	for (x = 0; x < width; x += 4) {				\
+	    const byte *lines[4];					\
+	    lines[0] = &data[offs];					\
+	    lines[1] = lines[0] + srcRowStride;				\
+	    lines[2] = lines[1] + srcRowStride;				\
+	    lines[3] = lines[2] + srcRowStride;				\
+	    offs += 4 * comps;						\
+	    dxtn##_quantize(encoded, lines, comps);			\
+	    /* 4x4 block */						\
+	    encoded += n;						\
+	}								\
+	encoded += destRowStride;					\
+    }									\
+									\
+    if (newSource != NULL) {						\
+	free(newSource);						\
+    }									\
+									\
+    return 0;								\
+}
+
+ENCODER(dxt1_rgb,  2)
+ENCODER(dxt1_rgba, 2)
+ENCODER(dxt3_rgba, 4)
+ENCODER(dxt5_rgba, 4)
+
+
+/***************************************************************************\
+ * DXTn decoder
+ *
+ * The decoder is based on GL_EXT_texture_compression_s3tc
+ * specification and serves as a concept for the encoder.
+\***************************************************************************/
+
+
+/* lookup table for scaling 4 bit colors up to 8 bits */
+static const byte _rgb_scale_4[] = {
+    0,   17,  34,  51,  68,  85,  102, 119,
+    136, 153, 170, 187, 204, 221, 238, 255
+};
+
+/* lookup table for scaling 5 bit colors up to 8 bits */
+static const byte _rgb_scale_5[] = {
+    0,   8,   16,  25,  33,  41,  49,  58,
+    66,  74,  82,  90,  99,  107, 115, 123,
+    132, 140, 148, 156, 165, 173, 181, 189,
+    197, 206, 214, 222, 230, 239, 247, 255
+};
+
+/* lookup table for scaling 6 bit colors up to 8 bits */
+static const byte _rgb_scale_6[] = {
+    0,   4,   8,   12,  16,  20,  24,  28,
+    32,  36,  40,  45,  49,  53,  57,  61,
+    65,  69,  73,  77,  81,  85,  89,  93,
+    97,  101, 105, 109, 113, 117, 121, 125,
+    130, 134, 138, 142, 146, 150, 154, 158,
+    162, 166, 170, 174, 178, 182, 186, 190,
+    194, 198, 202, 206, 210, 215, 219, 223,
+    227, 231, 235, 239, 243, 247, 251, 255
+};
+
+
+#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
+#define UP4(c) _rgb_scale_4[(c) & 15]
+#define UP5(c) _rgb_scale_5[(c) & 31]
+#define UP6(c) _rgb_scale_6[(c) & 63]
+#define ZERO_4UBV(v) *((dword *)(v)) = 0
+
+
+void TAPIENTRY
+dxt1_rgb_decode_1 (const void *texture, int stride,
+		   int i, int j, byte *rgba)
+{
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
+    const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 11));
+	rgba[GCOMP] = UP6(CC_SEL(src,  5));
+	rgba[BCOMP] = UP5(CC_SEL(src,  0));
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 27));
+	rgba[GCOMP] = UP6(CC_SEL(src, 21));
+	rgba[BCOMP] = UP5(CC_SEL(src, 16));
+    } else {
+	const word col0 = src[0] | (src[1] << 8);
+	const word col1 = src[2] | (src[3] << 8);
+	if (col0 > col1) {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) * 2 + UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) * 2 + UP5(col1      )) / 3;
+	    } else {
+		rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) + 2 * UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) + 2 * UP5(col1      )) / 3;
+	    }
+	} else {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
+		rgba[GCOMP] = (UP6(col0 >>  5) + UP6(col1 >>  5)) / 2;
+		rgba[BCOMP] = (UP5(col0      ) + UP5(col1      )) / 2;
+	    } else {
+		ZERO_4UBV(rgba);
+	    }
+	}
+    }
+    rgba[ACOMP] = 255;
+}
+
+
+void TAPIENTRY
+dxt1_rgba_decode_1 (const void *texture, int stride,
+		    int i, int j, byte *rgba)
+{
+    /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
+    const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 11));
+	rgba[GCOMP] = UP6(CC_SEL(src,  5));
+	rgba[BCOMP] = UP5(CC_SEL(src,  0));
+	rgba[ACOMP] = 255;
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 27));
+	rgba[GCOMP] = UP6(CC_SEL(src, 21));
+	rgba[BCOMP] = UP5(CC_SEL(src, 16));
+	rgba[ACOMP] = 255;
+    } else {
+	const word col0 = src[0] | (src[1] << 8);
+	const word col1 = src[2] | (src[3] << 8);
+	if (col0 > col1) {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) * 2 + UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) * 2 + UP5(col1      )) / 3;
+	    } else {
+		rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) + 2 * UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) + 2 * UP5(col1      )) / 3;
+	    }
+	    rgba[ACOMP] = 255;
+	} else {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
+		rgba[GCOMP] = (UP6(col0 >>  5) + UP6(col1 >>  5)) / 2;
+		rgba[BCOMP] = (UP5(col0      ) + UP5(col1      )) / 2;
+		rgba[ACOMP] = 255;
+	    } else {
+		ZERO_4UBV(rgba);
+	    }
+	}
+    }
+}
+
+
+void TAPIENTRY
+dxt3_rgba_decode_1 (const void *texture, int stride,
+		    int i, int j, byte *rgba)
+{
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
+    const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    const dword *cc = (const dword *)(src + 8);
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 11));
+	rgba[GCOMP] = UP6(CC_SEL(cc,  5));
+	rgba[BCOMP] = UP5(CC_SEL(cc,  0));
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 27));
+	rgba[GCOMP] = UP6(CC_SEL(cc, 21));
+	rgba[BCOMP] = UP5(CC_SEL(cc, 16));
+    } else if (code == 2) {
+	/* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
+    } else {
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
+    }
+    rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4));
+}
+
+
+void TAPIENTRY
+dxt5_rgba_decode_1 (const void *texture, int stride,
+		    int i, int j, byte *rgba)
+{
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
+    const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    const dword *cc = (const dword *)(src + 8);
+    const byte alpha0 = src[0];
+    const byte alpha1 = src[1];
+    const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16;
+    const int acode = ((alphaShift == 31)
+			? CC_SEL(src + 2, alphaShift - 16)
+			: CC_SEL(src, alphaShift)) & 0x7;
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 11));
+	rgba[GCOMP] = UP6(CC_SEL(cc,  5));
+	rgba[BCOMP] = UP5(CC_SEL(cc,  0));
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 27));
+	rgba[GCOMP] = UP6(CC_SEL(cc, 21));
+	rgba[BCOMP] = UP5(CC_SEL(cc, 16));
+    } else if (code == 2) {
+	/* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
+    } else {
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
+    }
+    if (acode == 0) {
+	rgba[ACOMP] = alpha0;
+    } else if (acode == 1) {
+	rgba[ACOMP] = alpha1;
+    } else if (alpha0 > alpha1) {
+	rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7;
+    } else if (acode == 6) {
+	rgba[ACOMP] = 0;
+    } else if (acode == 7) {
+	rgba[ACOMP] = 255;
+    } else {
+	rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5;
+    }
+}
diff --git a/Source/GlideHQ/tc-1.1+/dxtn.h b/Source/GlideHQ/tc-1.1+/dxtn.h
new file mode 100644
index 000000000..4078fd9f9
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/dxtn.h
@@ -0,0 +1,62 @@
+/*
+ * DXTn codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+
+#ifndef DXTN_H_included
+#define DXTN_H_included
+
+TAPI int TAPIENTRY
+dxt1_rgb_encode (int width, int height, int comps,
+		 const void *source, int srcRowStride,
+		 void *dest, int destRowStride);
+
+TAPI int TAPIENTRY
+dxt1_rgba_encode (int width, int height, int comps,
+		  const void *source, int srcRowStride,
+		  void *dest, int destRowStride);
+
+TAPI int TAPIENTRY
+dxt3_rgba_encode (int width, int height, int comps,
+		  const void *source, int srcRowStride,
+		  void *dest, int destRowStride);
+
+TAPI int TAPIENTRY
+dxt5_rgba_encode (int width, int height, int comps,
+		  const void *source, int srcRowStride,
+		  void *dest, int destRowStride);
+
+TAPI void TAPIENTRY
+dxt1_rgb_decode_1 (const void *texture, int stride /* in pixels */,
+		   int i, int j, byte *rgba);
+
+TAPI void TAPIENTRY
+dxt1_rgba_decode_1 (const void *texture, int stride /* in pixels */,
+		    int i, int j, byte *rgba);
+
+TAPI void TAPIENTRY
+dxt3_rgba_decode_1 (const void *texture, int stride /* in pixels */,
+		    int i, int j, byte *rgba);
+
+TAPI void TAPIENTRY
+dxt5_rgba_decode_1 (const void *texture, int stride /* in pixels */,
+		    int i, int j, byte *rgba);
+
+#endif
diff --git a/Source/GlideHQ/tc-1.1+/fxt1.c b/Source/GlideHQ/tc-1.1+/fxt1.c
new file mode 100644
index 000000000..1287ced53
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/fxt1.c
@@ -0,0 +1,1459 @@
+/*
+ * FXT1 codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Added support for ARGB inputs.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "types.h"
+#include "internal.h"
+#include "fxt1.h"
+
+
+/***************************************************************************\
+ * FXT1 encoder
+ *
+ * The encoder was built by reversing the decoder,
+ * and is vaguely based on Texus2 by 3dfx. Note that this code
+ * is merely a proof of concept, since it is highly UNoptimized;
+ * moreover, it is sub-optimal due to initial conditions passed
+ * to Lloyd's algorithm (the interpolation modes are even worse).
+\***************************************************************************/
+
+
+#define MAX_COMP 4 /* ever needed maximum number of components in texel */
+#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
+#define N_TEXELS 32 /* number of texels in a block (always 32) */
+#define LL_N_REP 50 /* number of iterations in lloyd's vq */
+#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
+#define LL_RMS_E 255 /* fault tolerance (maximum error) */
+#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
+#define ISTBLACK(v) (*((dword *)(v)) == 0)
+#define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
+
+
+static int
+fxt1_bestcol (float vec[][MAX_COMP], int nv,
+	      byte input[MAX_COMP], int nc)
+{
+    int i, j, best = -1;
+    float err = 1e9; /* big enough */
+
+    for (j = 0; j < nv; j++) {
+	float e = 0.0F;
+	for (i = 0; i < nc; i++) {
+	    e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
+	}
+	if (e < err) {
+	    err = e;
+	    best = j;
+	}
+    }
+
+    return best;
+}
+
+
+static int
+fxt1_worst (float vec[MAX_COMP],
+	    byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+    int i, k, worst = -1;
+    float err = -1.0F; /* small enough */
+
+    for (k = 0; k < n; k++) {
+	float e = 0.0F;
+	for (i = 0; i < nc; i++) {
+	    e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
+	}
+	if (e > err) {
+	    err = e;
+	    worst = k;
+	}
+    }
+
+    return worst;
+}
+
+
+static int
+fxt1_variance (double variance[MAX_COMP],
+	       byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+    int i, k, best = 0;
+    dword sx, sx2;
+    double var, maxvar = -1; /* small enough */
+    double teenth = 1.0 / n;
+
+    for (i = 0; i < nc; i++) {
+	sx = sx2 = 0;
+	for (k = 0; k < n; k++) {
+	    int t = input[k][i];
+	    sx += t;
+	    sx2 += t * t;
+	}
+	var = sx2 * teenth - sx * sx * teenth * teenth;
+	if (maxvar < var) {
+	    maxvar = var;
+	    best = i;
+	}
+	if (variance) {
+	    variance[i] = var;
+	}
+    }
+
+    return best;
+}
+
+
+static int
+fxt1_choose (float vec[][MAX_COMP], int nv,
+	     byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+#if 0
+    /* Choose colors from a grid.
+     */
+    int i, j;
+
+    for (j = 0; j < nv; j++) {
+	int m = j * (n - 1) / (nv - 1);
+	for (i = 0; i < nc; i++) {
+	    vec[j][i] = input[m][i];
+	}
+    }
+#else
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 8x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    int i, j, k;
+#ifndef YUV
+    int minSum = 2000; /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1; /* small enough */
+    int minCol = 0; /* phoudoin: silent compiler! */
+    int maxCol = 0; /* phoudoin: silent compiler! */
+
+    struct {
+	int flag;
+	dword key;
+	int freq;
+	int idx;
+    } hist[N_TEXELS];
+    int lenh = 0;
+
+    memset(hist, 0, sizeof(hist));
+
+    for (k = 0; k < n; k++) {
+	int l;
+	dword key = 0;
+	int sum = 0;
+	for (i = 0; i < nc; i++) {
+	    key <<= 8;
+	    key |= input[k][i];
+#ifndef YUV
+	    sum += input[k][i];
+#else
+            /* RGB to YUV conversion according to CCIR 601 specs
+             * Y = 0.299R+0.587G+0.114B
+             * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
+             * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
+             */
+            sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+        }
+	for (l = 0; l < n; l++) {
+	    if (!hist[l].flag) {
+		/* alloc new slot */
+		hist[l].flag = !0;
+		hist[l].key = key;
+		hist[l].freq = 1;
+		hist[l].idx = k;
+		lenh = l + 1;
+		break;
+	    } else if (hist[l].key == key) {
+		hist[l].freq++;
+		break;
+	    }
+	}
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+    }
+
+    if (lenh <= nv) {
+	for (j = 0; j < lenh; j++) {
+	    for (i = 0; i < nc; i++) {
+		vec[j][i] = (float)input[hist[j].idx][i];
+	    }
+	}
+	for (; j < nv; j++) {
+	    for (i = 0; i < nc; i++) {
+		vec[j][i] = vec[0][i];
+	    }
+	}
+	return 0;
+    }
+
+    for (j = 0; j < nv; j++) {
+	for (i = 0; i < nc; i++) {
+	    vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
+	}
+    }
+#endif
+
+    return !0;
+}
+
+
+static int
+fxt1_lloyd (float vec[][MAX_COMP], int nv,
+	    byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+    /* Use the generalized lloyd's algorithm for VQ:
+     *     find 4 color vectors.
+     *
+     *     for each sample color
+     *         sort to nearest vector.
+     *
+     *     replace each vector with the centroid of it's matching colors.
+     *
+     *     repeat until RMS doesn't improve.
+     *
+     *     if a color vector has no samples, or becomes the same as another
+     *     vector, replace it with the color which is farthest from a sample.
+     *
+     * vec[][MAX_COMP]           initial vectors and resulting colors
+     * nv                        number of resulting colors required
+     * input[N_TEXELS][MAX_COMP] input texels
+     * nc                        number of components in input / vec
+     * n                         number of input samples
+     */
+
+    int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
+    int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
+    float error, lasterror = 1e9;
+
+    int i, j, k, rep;
+
+    /* the quantizer */
+    for (rep = 0; rep < LL_N_REP; rep++) {
+	/* reset sums & counters */
+	for (j = 0; j < nv; j++) {
+	    for (i = 0; i < nc; i++) {
+		sum[j][i] = 0;
+	    }
+	    cnt[j] = 0;
+	}
+	error = 0;
+
+	/* scan whole block */
+	for (k = 0; k < n; k++) {
+#if 1
+	    int best = -1;
+	    float err = 1e9; /* big enough */
+	    /* determine best vector */
+	    for (j = 0; j < nv; j++) {
+		float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
+			  (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
+			  (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
+		if (nc == 4) {
+		    e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
+		}
+		if (e < err) {
+		    err = e;
+		    best = j;
+		}
+	    }
+#else
+	    int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
+#endif
+	    /* add in closest color */
+	    for (i = 0; i < nc; i++) {
+		sum[best][i] += input[k][i];
+	    }
+	    /* mark this vector as used */
+	    cnt[best]++;
+	    /* accumulate error */
+	    error += err;
+	}
+
+	/* check RMS */
+	if ((error < LL_RMS_E) ||
+	    ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
+	    return !0; /* good match */
+	}
+	lasterror = error;
+
+	/* move each vector to the barycenter of its closest colors */
+	for (j = 0; j < nv; j++) {
+	    if (cnt[j]) {
+		float div = 1.0F / cnt[j];
+		for (i = 0; i < nc; i++) {
+		    vec[j][i] = div * sum[j][i];
+		}
+	    } else {
+		/* this vec has no samples or is identical with a previous vec */
+		int worst = fxt1_worst(vec[j], input, nc, n);
+		for (i = 0; i < nc; i++) {
+		    vec[j][i] = input[worst][i];
+		}
+	    }
+	}
+    }
+
+    return 0; /* could not converge fast enough */
+}
+
+
+static void
+fxt1_quantize_CHROMA (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 4; /* 4 base vectors to find */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    float vec[MAX_VECT][MAX_COMP];
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
+	fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
+    }
+
+    Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
+    for (j = n_vect - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+	}
+    }
+    ((qword *)cc)[1] = hi;
+
+    lohi = lolo = 0;
+    /* right microtile */
+    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	lohi <<= 2;
+	lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
+    }
+    /* left microtile */
+    for (; k >= 0; k--) {
+	lolo <<= 2;
+	lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
+    }
+    cc[1] = lohi;
+    cc[0] = lolo;
+}
+
+
+static void
+fxt1_quantize_ALPHA0 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP],
+		      byte reord[N_TEXELS][MAX_COMP], int n)
+{
+    const int n_vect = 3; /* 3 base vectors to find */
+    const int n_comp = 4; /* 4 components: R, G, B, A */
+    float vec[MAX_VECT][MAX_COMP];
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    /* the last vector indicates zero */
+    for (i = 0; i < n_comp; i++) {
+	vec[n_vect][i] = 0;
+    }
+
+    /* the first n texels in reord are guaranteed to be non-zero */
+    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
+	fxt1_lloyd(vec, n_vect, reord, n_comp, n);
+    }
+
+    Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
+    for (j = n_vect - 1; j >= 0; j--) {
+	/* add in alphas */
+	Q_SHL(hi, 5);
+	Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
+    }
+    for (j = n_vect - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp - 1; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+	}
+    }
+    ((qword *)cc)[1] = hi;
+
+    lohi = lolo = 0;
+    /* right microtile */
+    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	lohi <<= 2;
+	lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
+    }
+    /* left microtile */
+    for (; k >= 0; k--) {
+	lolo <<= 2;
+	lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
+    }
+    cc[1] = lohi;
+    cc[0] = lolo;
+}
+
+
+static void
+fxt1_quantize_ALPHA1 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 3; /* highest vector number in each microtile */
+    const int n_comp = 4; /* 4 components: R, G, B, A */
+    float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
+    float b, iv[MAX_COMP]; /* interpolation vector */
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    int minSum;
+    int maxSum;
+    int minColL = 0, maxColL = 0;
+    int minColR = 0, maxColR = 0;
+    int sumL = 0, sumR = 0;
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColL = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColL = k;
+	}
+	sumL += sum;
+    }
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColR = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColR = k;
+	}
+	sumR += sum;
+    }
+
+    /* choose the common vector (yuck!) */
+    {
+	int j1, j2;
+	int v1 = 0, v2 = 0;
+	float err = 1e9; /* big enough */
+	float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+	for (i = 0; i < n_comp; i++) {
+	    tv[0][i] = input[minColL][i];
+	    tv[1][i] = input[maxColL][i];
+	    tv[2][i] = input[minColR][i];
+	    tv[3][i] = input[maxColR][i];
+	}
+	for (j1 = 0; j1 < 2; j1++) {
+	    for (j2 = 2; j2 < 4; j2++) {
+		float e = 0.0F;
+		for (i = 0; i < n_comp; i++) {
+		    e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
+		}
+		if (e < err) {
+		    err = e;
+		    v1 = j1;
+		    v2 = j2;
+		}
+	    }
+	}
+	for (i = 0; i < n_comp; i++) {
+	    vec[0][i] = tv[1 - v1][i];
+	    vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
+	    vec[2][i] = tv[5 - v2][i];
+	}
+    }
+
+    /* left microtile */
+    cc[0] = 0;
+    if (minColL != maxColL) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+	/* add in texels */
+	lolo = 0;
+	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lolo <<= 2;
+	    lolo |= texel;
+	}
+
+	cc[0] = lolo;
+    }
+
+    /* right microtile */
+    cc[1] = 0;
+    if (minColR != maxColR) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
+
+	/* add in texels */
+	lohi = 0;
+	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lohi <<= 2;
+	    lohi |= texel;
+	}
+
+	cc[1] = lohi;
+    }
+
+    Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
+    for (j = n_vect - 1; j >= 0; j--) {
+	/* add in alphas */
+	Q_SHL(hi, 5);
+	Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
+    }
+    for (j = n_vect - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp - 1; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+	}
+    }
+    ((qword *)cc)[1] = hi;
+}
+
+
+static void
+fxt1_quantize_HI (dword *cc,
+		  byte input[N_TEXELS][MAX_COMP],
+		  byte reord[N_TEXELS][MAX_COMP], int n)
+{
+    const int n_vect = 6; /* highest vector number */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    float b = 0.0F;       /* phoudoin: silent compiler! */
+    float iv[MAX_COMP];   /* interpolation vector */
+    int i, k;
+    dword hihi; /* high quadword: hi dword */
+
+#ifndef YUV
+    int minSum = 2000; /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1; /* small enough */
+    int minCol = 0; /* phoudoin: silent compiler! */
+    int maxCol = 0; /* phoudoin: silent compiler! */
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 8x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < n; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += reord[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+    }
+
+    hihi = 0; /* cc-hi = "00" */
+    for (i = 0; i < n_comp; i++) {
+	/* add in colors */
+	hihi <<= 5;
+	hihi |= reord[maxCol][i] >> 3;
+    }
+    for (i = 0; i < n_comp; i++) {
+	/* add in colors */
+	hihi <<= 5;
+	hihi |= reord[minCol][i] >> 3;
+    }
+    cc[3] = hihi;
+    cc[0] = cc[1] = cc[2] = 0;
+
+    /* compute interpolation vector */
+    if (minCol != maxCol) {
+	MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
+    }
+
+    /* add in texels */
+    for (k = N_TEXELS - 1; k >= 0; k--) {
+	int t = k * 3;
+	dword *kk = (dword *)((byte *)cc + t / 8);
+	int texel = n_vect + 1; /* transparent black */
+
+	if (!ISTBLACK(input[k])) {
+	    if (minCol != maxCol) {
+		/* interpolate color */
+		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		/* add in texel */
+		kk[0] |= texel << (t & 7);
+	    }
+	} else {
+	    /* add in texel */
+	    kk[0] |= texel << (t & 7);
+	}
+    }
+}
+
+
+static void
+fxt1_quantize_MIXED1 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 2; /* highest vector number in each microtile */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+    float b, iv[MAX_COMP]; /* interpolation vector */
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    int minSum;
+    int maxSum;
+    int minColL = 0, maxColL = -1;
+    int minColR = 0, maxColR = -1;
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	if (!ISTBLACK(input[k])) {
+	    int sum = 0;
+#ifndef YUV
+	    for (i = 0; i < n_comp; i++) {
+		sum += input[k][i];
+	    }
+#else
+            sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	    if (minSum > sum) {
+		minSum = sum;
+		minColL = k;
+	    }
+	    if (maxSum < sum) {
+		maxSum = sum;
+		maxColL = k;
+	    }
+	}
+    }
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	if (!ISTBLACK(input[k])) {
+	    int sum = 0;
+#ifndef YUV
+	    for (i = 0; i < n_comp; i++) {
+		sum += input[k][i];
+	    }
+#else
+            sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	    if (minSum > sum) {
+		minSum = sum;
+		minColR = k;
+	    }
+	    if (maxSum < sum) {
+		maxSum = sum;
+		maxColR = k;
+	    }
+	}
+    }
+
+    /* left microtile */
+    if (maxColL == -1) {
+	/* all transparent black */
+	cc[0] = ~0UL;
+	for (i = 0; i < n_comp; i++) {
+	    vec[0][i] = 0;
+	    vec[1][i] = 0;
+	}
+    } else {
+	cc[0] = 0;
+	for (i = 0; i < n_comp; i++) {
+	    vec[0][i] = input[minColL][i];
+	    vec[1][i] = input[maxColL][i];
+	}
+	if (minColL != maxColL) {
+	    /* compute interpolation vector */
+	    MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+	    /* add in texels */
+	    lolo = 0;
+	    for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+		int texel = n_vect + 1;	/* transparent black */
+		if (!ISTBLACK(input[k])) {
+		    /* interpolate color */
+		    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		}
+		/* add in texel */
+		lolo <<= 2;
+		lolo |= texel;
+	    }
+	    cc[0] = lolo;
+	}
+    }
+
+    /* right microtile */
+    if (maxColR == -1) {
+	/* all transparent black */
+	cc[1] = ~0UL;
+	for (i = 0; i < n_comp; i++) {
+	    vec[2][i] = 0;
+	    vec[3][i] = 0;
+	}
+    } else {
+	cc[1] = 0;
+	for (i = 0; i < n_comp; i++) {
+	    vec[2][i] = input[minColR][i];
+	    vec[3][i] = input[maxColR][i];
+	}
+	if (minColR != maxColR) {
+	    /* compute interpolation vector */
+	    MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
+
+	    /* add in texels */
+	    lohi = 0;
+	    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+		int texel = n_vect + 1;	/* transparent black */
+		if (!ISTBLACK(input[k])) {
+		    /* interpolate color */
+		    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		}
+		/* add in texel */
+		lohi <<= 2;
+		lohi |= texel;
+	    }
+	    cc[1] = lohi;
+	}
+    }
+
+    Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
+    for (j = 2 * 2 - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, vec[j][i] >> 3);
+	}
+    }
+    ((qword *)cc)[1] = hi;
+}
+
+
+static void
+fxt1_quantize_MIXED0 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 3; /* highest vector number in each microtile */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+    float b, iv[MAX_COMP]; /* interpolation vector */
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    int minColL = 0, maxColL = 0;
+    int minColR = 0, maxColR = 0;
+#if 0
+    int minSum;
+    int maxSum;
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColL = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColL = k;
+	}
+    }
+    minSum = 2000; /* big enough */
+    maxSum = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColR = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColR = k;
+	}
+    }
+#else
+    int minVal;
+    int maxVal;
+    int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
+    int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
+
+    /* Scan the channel with max variance for lo & hi
+     * and use those as the two representative colors.
+     */
+    minVal = 2000; /* big enough */
+    maxVal = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	int t = input[k][maxVarL];
+	if (minVal > t) {
+	    minVal = t;
+	    minColL = k;
+	}
+	if (maxVal < t) {
+	    maxVal = t;
+	    maxColL = k;
+	}
+    }
+    minVal = 2000; /* big enough */
+    maxVal = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	int t = input[k][maxVarR];
+	if (minVal > t) {
+	    minVal = t;
+	    minColR = k;
+	}
+	if (maxVal < t) {
+	    maxVal = t;
+	    maxColR = k;
+	}
+    }
+#endif
+
+    /* left microtile */
+    cc[0] = 0;
+    for (i = 0; i < n_comp; i++) {
+	vec[0][i] = input[minColL][i];
+	vec[1][i] = input[maxColL][i];
+    }
+    if (minColL != maxColL) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+	/* add in texels */
+	lolo = 0;
+	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lolo <<= 2;
+	    lolo |= texel;
+	}
+
+	/* funky encoding for LSB of green */
+	if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
+	    for (i = 0; i < n_comp; i++) {
+		vec[1][i] = input[minColL][i];
+		vec[0][i] = input[maxColL][i];
+	    }
+	    lolo = ~lolo;
+	}
+
+	cc[0] = lolo;
+    }
+
+    /* right microtile */
+    cc[1] = 0;
+    for (i = 0; i < n_comp; i++) {
+	vec[2][i] = input[minColR][i];
+	vec[3][i] = input[maxColR][i];
+    }
+    if (minColR != maxColR) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
+
+	/* add in texels */
+	lohi = 0;
+	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lohi <<= 2;
+	    lohi |= texel;
+	}
+
+	/* funky encoding for LSB of green */
+	if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
+	    for (i = 0; i < n_comp; i++) {
+		vec[3][i] = input[minColR][i];
+		vec[2][i] = input[maxColR][i];
+	    }
+	    lohi = ~lohi;
+	}
+
+	cc[1] = lohi;
+    }
+
+    Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
+    for (j = 2 * 2 - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, vec[j][i] >> 3);
+	}
+    }
+    ((qword *)cc)[1] = hi;
+}
+
+
+static void
+fxt1_quantize (dword *cc, const byte *lines[], int comps)
+{
+    int trualpha;
+    byte reord[N_TEXELS][MAX_COMP];
+
+    byte input[N_TEXELS][MAX_COMP];
+#ifndef ARGB
+    int i;
+#endif
+    int k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 8 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+	for (; k < 8; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4 + 12][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+	for (; k < 8; k++) {
+          input[k + l * 4 + 12][2] = *lines[l]++;
+          input[k + l * 4 + 12][1] = *lines[l]++;
+          input[k + l * 4 + 12][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* block layout:
+     * 00, 01, 02, 03, 08, 09, 0a, 0b
+     * 10, 11, 12, 13, 18, 19, 1a, 1b
+     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
+     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
+     */
+
+    /* [dBorca]
+     * stupidity flows forth from this
+     */
+    l = N_TEXELS;
+    trualpha = 0;
+    if (comps == 4) {
+	/* skip all transparent black texels */
+	l = 0;
+	for (k = 0; k < N_TEXELS; k++) {
+	    /* test all components against 0 */
+	    if (!ISTBLACK(input[k])) {
+		/* texel is not transparent black */
+		COPY_4UBV(reord[l], input[k]);
+		if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
+		    /* non-opaque texel */
+		    trualpha = !0;
+		}
+		l++;
+	    }
+	}
+    }
+
+#if 0
+    if (trualpha) {
+	fxt1_quantize_ALPHA0(cc, input, reord, l);
+    } else if (l == 0) {
+	cc[0] = cc[1] = cc[2] = -1;
+	cc[3] = 0;
+    } else if (l < N_TEXELS) {
+	fxt1_quantize_HI(cc, input, reord, l);
+    } else {
+	fxt1_quantize_CHROMA(cc, input);
+    }
+    (void)fxt1_quantize_ALPHA1;
+    (void)fxt1_quantize_MIXED1;
+    (void)fxt1_quantize_MIXED0;
+#else
+    if (trualpha) {
+	fxt1_quantize_ALPHA1(cc, input);
+    } else if (l == 0) {
+	cc[0] = cc[1] = cc[2] = ~0UL;
+	cc[3] = 0;
+    } else if (l < N_TEXELS) {
+	fxt1_quantize_MIXED1(cc, input);
+    } else {
+	fxt1_quantize_MIXED0(cc, input);
+    }
+    (void)fxt1_quantize_ALPHA0;
+    (void)fxt1_quantize_HI;
+    (void)fxt1_quantize_CHROMA;
+#endif
+}
+
+
+TAPI int TAPIENTRY
+fxt1_encode (int width, int height, int comps,
+	     const void *source, int srcRowStride,
+	     void *dest, int destRowStride)
+{
+    int x, y;
+    const byte *data;
+    dword *encoded = (dword *)dest;
+    void *newSource = NULL;
+
+    /* Replicate image if width is not M8 or height is not M4 */
+    if ((width & 7) | (height & 3)) {
+	int newWidth = (width + 7) & ~7;
+	int newHeight = (height + 3) & ~3;
+	newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
+	_mesa_upscale_teximage2d(width, height, newWidth, newHeight,
+				 comps, (const byte *)source,
+				 srcRowStride, (byte *)newSource);
+	source = newSource;
+	width = newWidth;
+	height = newHeight;
+	srcRowStride = comps * newWidth;
+    }
+
+    data = (const byte *)source;
+    destRowStride = (destRowStride - width * 2) / 4;
+    for (y = 0; y < height; y += 4) {
+	unsigned int offs = 0 + (y + 0) * srcRowStride;
+	for (x = 0; x < width; x += 8) {
+	    const byte *lines[4];
+	    lines[0] = &data[offs];
+	    lines[1] = lines[0] + srcRowStride;
+	    lines[2] = lines[1] + srcRowStride;
+	    lines[3] = lines[2] + srcRowStride;
+	    offs += 8 * comps;
+	    fxt1_quantize(encoded, lines, comps);
+	    /* 128 bits per 8x4 block */
+	    encoded += 4;
+	}
+	encoded += destRowStride;
+    }
+
+    if (newSource != NULL) {
+	free(newSource);
+    }
+
+    return 0;
+}
+
+
+/***************************************************************************\
+ * FXT1 decoder
+ *
+ * The decoder is based on GL_3DFX_texture_compression_FXT1
+ * specification and serves as a concept for the encoder.
+\***************************************************************************/
+
+
+/* lookup table for scaling 5 bit colors up to 8 bits */
+static const byte _rgb_scale_5[] = {
+    0,   8,   16,  25,  33,  41,  49,  58,
+    66,  74,  82,  90,  99,  107, 115, 123,
+    132, 140, 148, 156, 165, 173, 181, 189,
+    197, 206, 214, 222, 230, 239, 247, 255
+};
+
+/* lookup table for scaling 6 bit colors up to 8 bits */
+static const byte _rgb_scale_6[] = {
+    0,   4,   8,   12,  16,  20,  24,  28,
+    32,  36,  40,  45,  49,  53,  57,  61,
+    65,  69,  73,  77,  81,  85,  89,  93,
+    97,  101, 105, 109, 113, 117, 121, 125,
+    130, 134, 138, 142, 146, 150, 154, 158,
+    162, 166, 170, 174, 178, 182, 186, 190,
+    194, 198, 202, 206, 210, 215, 219, 223,
+    227, 231, 235, 239, 243, 247, 251, 255
+};
+
+
+#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
+#define UP5(c) _rgb_scale_5[(c) & 31]
+#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
+#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
+#define ZERO_4UBV(v) *((dword *)(v)) = 0
+
+
+static void
+fxt1_decode_1HI (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+
+    t *= 3;
+    cc = (const dword *)(code + t / 8);
+    t = (cc[0] >> (t & 7)) & 7;
+
+    if (t == 7) {
+	ZERO_4UBV(rgba);
+    } else {
+	cc = (const dword *)(code + 12);
+	if (t == 0) {
+	    rgba[BCOMP] = UP5(CC_SEL(cc, 0));
+	    rgba[GCOMP] = UP5(CC_SEL(cc, 5));
+	    rgba[RCOMP] = UP5(CC_SEL(cc, 10));
+	} else if (t == 6) {
+	    rgba[BCOMP] = UP5(CC_SEL(cc, 15));
+	    rgba[GCOMP] = UP5(CC_SEL(cc, 20));
+	    rgba[RCOMP] = UP5(CC_SEL(cc, 25));
+	} else {
+	    rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
+	    rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
+	    rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
+	}
+	rgba[ACOMP] = 255;
+    }
+}
+
+
+static void
+fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+    dword kk;
+
+    cc = (const dword *)code;
+    if (t & 16) {
+	cc++;
+	t &= 15;
+    }
+    t = (cc[0] >> (t * 2)) & 3;
+
+    t *= 15;
+    cc = (const dword *)(code + 8 + t / 8);
+    kk = cc[0] >> (t & 7);
+    rgba[BCOMP] = UP5(kk);
+    rgba[GCOMP] = UP5(kk >> 5);
+    rgba[RCOMP] = UP5(kk >> 10);
+    rgba[ACOMP] = 255;
+}
+
+
+static void
+fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+    int col[2][3];
+    int glsb, selb;
+
+    cc = (const dword *)code;
+    if (t & 16) {
+	t &= 15;
+	t = (cc[1] >> (t * 2)) & 3;
+	/* col 2 */
+	col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
+	col[0][GCOMP] = CC_SEL(cc, 99);
+	col[0][RCOMP] = CC_SEL(cc, 104);
+	/* col 3 */
+	col[1][BCOMP] = CC_SEL(cc, 109);
+	col[1][GCOMP] = CC_SEL(cc, 114);
+	col[1][RCOMP] = CC_SEL(cc, 119);
+	glsb = CC_SEL(cc, 126);
+	selb = CC_SEL(cc, 33);
+    } else {
+	t = (cc[0] >> (t * 2)) & 3;
+	/* col 0 */
+	col[0][BCOMP] = CC_SEL(cc, 64);
+	col[0][GCOMP] = CC_SEL(cc, 69);
+	col[0][RCOMP] = CC_SEL(cc, 74);
+	/* col 1 */
+	col[1][BCOMP] = CC_SEL(cc, 79);
+	col[1][GCOMP] = CC_SEL(cc, 84);
+	col[1][RCOMP] = CC_SEL(cc, 89);
+	glsb = CC_SEL(cc, 125);
+	selb = CC_SEL(cc, 1);
+    }
+
+    if (CC_SEL(cc, 124) & 1) {
+	/* alpha[0] == 1 */
+
+	if (t == 3) {
+	    ZERO_4UBV(rgba);
+	} else {
+	    if (t == 0) {
+		rgba[BCOMP] = UP5(col[0][BCOMP]);
+		rgba[GCOMP] = UP5(col[0][GCOMP]);
+		rgba[RCOMP] = UP5(col[0][RCOMP]);
+	    } else if (t == 2) {
+		rgba[BCOMP] = UP5(col[1][BCOMP]);
+		rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
+		rgba[RCOMP] = UP5(col[1][RCOMP]);
+	    } else {
+		rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
+		rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
+		rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
+	    }
+	    rgba[ACOMP] = 255;
+	}
+    } else {
+	/* alpha[0] == 0 */
+
+	if (t == 0) {
+	    rgba[BCOMP] = UP5(col[0][BCOMP]);
+	    rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
+	    rgba[RCOMP] = UP5(col[0][RCOMP]);
+	} else if (t == 3) {
+	    rgba[BCOMP] = UP5(col[1][BCOMP]);
+	    rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
+	    rgba[RCOMP] = UP5(col[1][RCOMP]);
+	} else {
+	    rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
+	    rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
+				     UP6(col[1][GCOMP], glsb));
+	    rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
+	}
+	rgba[ACOMP] = 255;
+    }
+}
+
+
+static void
+fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+
+    cc = (const dword *)code;
+    if (CC_SEL(cc, 124) & 1) {
+	/* lerp == 1 */
+	int col0[4];
+
+	if (t & 16) {
+	    t &= 15;
+	    t = (cc[1] >> (t * 2)) & 3;
+	    /* col 2 */
+	    col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
+	    col0[GCOMP] = CC_SEL(cc, 99);
+	    col0[RCOMP] = CC_SEL(cc, 104);
+	    col0[ACOMP] = CC_SEL(cc, 119);
+	} else {
+	    t = (cc[0] >> (t * 2)) & 3;
+	    /* col 0 */
+	    col0[BCOMP] = CC_SEL(cc, 64);
+	    col0[GCOMP] = CC_SEL(cc, 69);
+	    col0[RCOMP] = CC_SEL(cc, 74);
+	    col0[ACOMP] = CC_SEL(cc, 109);
+	}
+
+	if (t == 0) {
+	    rgba[BCOMP] = UP5(col0[BCOMP]);
+	    rgba[GCOMP] = UP5(col0[GCOMP]);
+	    rgba[RCOMP] = UP5(col0[RCOMP]);
+	    rgba[ACOMP] = UP5(col0[ACOMP]);
+	} else if (t == 3) {
+	    rgba[BCOMP] = UP5(CC_SEL(cc, 79));
+	    rgba[GCOMP] = UP5(CC_SEL(cc, 84));
+	    rgba[RCOMP] = UP5(CC_SEL(cc, 89));
+	    rgba[ACOMP] = UP5(CC_SEL(cc, 114));
+	} else {
+	    rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
+	    rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
+	    rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
+	    rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
+	}
+    } else {
+	/* lerp == 0 */
+
+	if (t & 16) {
+	    cc++;
+	    t &= 15;
+	}
+	t = (cc[0] >> (t * 2)) & 3;
+
+	if (t == 3) {
+	    ZERO_4UBV(rgba);
+	} else {
+	    dword kk;
+	    cc = (const dword *)code;
+	    rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
+	    t *= 15;
+	    cc = (const dword *)(code + 8 + t / 8);
+	    kk = cc[0] >> (t & 7);
+	    rgba[BCOMP] = UP5(kk);
+	    rgba[GCOMP] = UP5(kk >> 5);
+	    rgba[RCOMP] = UP5(kk >> 10);
+	}
+    }
+}
+
+
+TAPI void TAPIENTRY
+fxt1_decode_1 (const void *texture, int stride,
+	       int i, int j, byte *rgba)
+{
+    static void (*decode_1[]) (const byte *, int, byte *) = {
+	fxt1_decode_1HI,	/* cc-high   = "00?" */
+	fxt1_decode_1HI,	/* cc-high   = "00?" */
+	fxt1_decode_1CHROMA,	/* cc-chroma = "010" */
+	fxt1_decode_1ALPHA,	/* alpha     = "011" */
+	fxt1_decode_1MIXED,	/* mixed     = "1??" */
+	fxt1_decode_1MIXED,	/* mixed     = "1??" */
+	fxt1_decode_1MIXED,	/* mixed     = "1??" */
+	fxt1_decode_1MIXED	/* mixed     = "1??" */
+    };
+
+    const byte *code = (const byte *)texture +
+			((j / 4) * (stride / 8) + (i / 8)) * 16;
+    int mode = CC_SEL(code, 125);
+    int t = i & 7;
+
+    if (t & 4) {
+	t += 12;
+    }
+    t += (j & 3) * 4;
+
+    decode_1[mode](code, t, rgba);
+
+#if VERBOSE
+    {
+	extern int cc_chroma;
+	extern int cc_alpha;
+	extern int cc_high;
+	extern int cc_mixed;
+	static int *cctype[] = {
+	    &cc_high,
+	    &cc_high,
+	    &cc_chroma,
+	    &cc_alpha,
+	    &cc_mixed,
+	    &cc_mixed,
+	    &cc_mixed,
+	    &cc_mixed
+	};
+	(*cctype[mode])++;
+    }
+#endif
+}
diff --git a/Source/GlideHQ/tc-1.1+/fxt1.h b/Source/GlideHQ/tc-1.1+/fxt1.h
new file mode 100644
index 000000000..c2919bbac
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/fxt1.h
@@ -0,0 +1,38 @@
+/*
+ * FXT1 codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef FXT1_H_included
+#define FXT1_H_included
+
+TAPI int TAPIENTRY
+fxt1_encode (int width, int height, int comps,
+             const void *source, int srcRowStride,
+             void *dest, int destRowStride);
+
+TAPI void TAPIENTRY
+fxt1_decode_1 (const void *texture, int stride /* in pixels */,
+	       int i, int j, byte *rgba);
+
+#endif
diff --git a/Source/GlideHQ/tc-1.1+/internal.h b/Source/GlideHQ/tc-1.1+/internal.h
new file mode 100644
index 000000000..28ace4b45
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/internal.h
@@ -0,0 +1,137 @@
+/*
+ * Texture compression
+ * Version:  1.0
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+
+#ifndef INTERNAL_H_included
+#define INTERNAL_H_included
+
+/*****************************************************************************\
+ * DLL stuff
+\*****************************************************************************/
+
+#ifdef __WIN32__
+#define TAPI __declspec(dllexport)
+#define TAPIENTRY /*__stdcall*/
+#else
+#define TAPI
+#define TAPIENTRY
+#endif
+
+
+/*****************************************************************************\
+ * 64bit types on 32bit machine
+\*****************************************************************************/
+
+#if (defined(__GNUC__) && !defined(__cplusplus)) || defined(WIN32)
+
+typedef unsigned long long qword;
+
+#define Q_MOV32(a, b) a = b
+#define Q_OR32(a, b)  a |= b
+#define Q_SHL(a, c)   a <<= c
+
+#else  /* !__GNUC__ */
+
+typedef struct {
+    dword lo, hi;
+} qword;
+
+#define Q_MOV32(a, b) a.lo = b
+#define Q_OR32(a, b)  a.lo |= b
+#define Q_SHL(a, c)					\
+    do {						\
+	if ((c) >= 32) {				\
+	    a.hi = a.lo << ((c) - 32);			\
+	    a.lo = 0;					\
+	} else {					\
+	    a.hi = (a.hi << (c)) | (a.lo >> (32 - (c)));\
+	    a.lo <<= c;					\
+	}						\
+    } while (0)
+
+#endif /* !__GNUC__ */
+
+
+/*****************************************************************************\
+ * Config
+\*****************************************************************************/
+
+#define RCOMP 0
+#define GCOMP 1
+#define BCOMP 2
+#define ACOMP 3
+
+/*****************************************************************************\
+ * Metric
+\*****************************************************************************/
+
+#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
+#define SAFECDOT 1 /* for paranoids */
+
+#define MAKEIVEC(NV, NC, IV, B, V0, V1)	\
+    do {				\
+	/* compute interpolation vector */\
+	float d2 = 0.0F;		\
+	float rd2;			\
+					\
+	for (i = 0; i < NC; i++) {	\
+	    IV[i] = (V1[i] - V0[i]) * F(i);\
+	    d2 += IV[i] * IV[i];	\
+	}				\
+	rd2 = (float)NV / d2;		\
+	B = 0;				\
+	for (i = 0; i < NC; i++) {	\
+	    IV[i] *= F(i);		\
+	    B -= IV[i] * V0[i];		\
+	    IV[i] *= rd2;		\
+	}				\
+	B = B * rd2 + 0.5F;		\
+    } while (0)
+
+#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
+    do {				\
+	float dot = 0.0F;		\
+	for (i = 0; i < NC; i++) {	\
+	    dot += V[i] * IV[i];	\
+	}				\
+	TEXEL = (int)(dot + B);		\
+	if (SAFECDOT) {			\
+	    if (TEXEL < 0) {		\
+		TEXEL = 0;		\
+	    } else if (TEXEL > NV) {	\
+		TEXEL = NV;		\
+	    }				\
+	}				\
+    } while (0)
+
+
+/*****************************************************************************\
+ * Utility functions
+\*****************************************************************************/
+
+void
+_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
+			  unsigned int outWidth, unsigned int outHeight,
+			  unsigned int comps,
+			  const byte *src, int srcRowStride,
+			  unsigned char *dest);
+
+#endif
diff --git a/Source/GlideHQ/tc-1.1+/texstore.c b/Source/GlideHQ/tc-1.1+/texstore.c
new file mode 100644
index 000000000..2eb0306fe
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/texstore.c
@@ -0,0 +1,93 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * _mesa_upscale_teximage2d speedup
+ */
+
+#include <assert.h>
+
+#include "types.h"
+#include "internal.h"
+
+
+void
+_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
+			  unsigned int outWidth, unsigned int outHeight,
+			  unsigned int comps,
+			  const byte *src, int srcRowStride,
+			  byte *dest)
+{
+    unsigned int i, j, k;
+
+    assert(outWidth >= inWidth);
+    assert(outHeight >= inHeight);
+
+#if 1 /* H.Morii - faster loops */
+  for (i = 0; i < inHeight; i++) {
+    for (j = 0; j < inWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = i * srcRowStride + j * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+    for (; j < outWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = i * srcRowStride + (j - inWidth) * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+  }
+  for (; i < outHeight; i++) {
+    for (j = 0; j < inWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = (i - inHeight) * srcRowStride + j * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+    for (; j < outWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = (i - inHeight) * srcRowStride + (j - inWidth) * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+  }
+#else
+    for (i = 0; i < outHeight; i++) {
+	const int ii = i % inHeight;
+	for (j = 0; j < outWidth; j++) {
+	    const int jj = j % inWidth;
+            const int aa = (i * outWidth + j) * comps;
+            const int bb = ii * srcRowStride + jj * comps;
+	    for (k = 0; k < comps; k++) {
+		dest[aa + k] = src[bb + k];
+	    }
+	}
+    }
+#endif
+}
diff --git a/Source/GlideHQ/tc-1.1+/types.h b/Source/GlideHQ/tc-1.1+/types.h
new file mode 100644
index 000000000..40e7153ab
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/types.h
@@ -0,0 +1,11 @@
+#ifndef TYPES_H_included
+#define TYPES_H_included
+
+/*****************************************************************************\
+ * 32bit types
+\*****************************************************************************/
+typedef unsigned char byte;	/*  8-bit */
+typedef unsigned short word;	/* 16-bit */
+typedef unsigned int dword;	/* 32-bit */
+
+#endif
diff --git a/Source/GlideHQ/tc-1.1+/wrapper.c b/Source/GlideHQ/tc-1.1+/wrapper.c
new file mode 100644
index 000000000..7bc8d610a
--- /dev/null
+++ b/Source/GlideHQ/tc-1.1+/wrapper.c
@@ -0,0 +1,110 @@
+/*
+ * Texture compression
+ * Version:  1.0
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+
+#include <assert.h>
+
+#include "types.h"
+#include "internal.h"
+#include "dxtn.h"
+
+
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT   0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT  0x83F1
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT  0x83F2
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT  0x83F3
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgb_dxt1 (int texImage_RowStride,
+			 const byte *texImage_Data,
+			 int i, int j,
+			 byte *texel)
+{
+    dxt1_rgb_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgba_dxt1 (int texImage_RowStride,
+			  const byte *texImage_Data,
+			  int i, int j,
+			  byte *texel)
+{
+    dxt1_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgba_dxt3 (int texImage_RowStride,
+			  const byte *texImage_Data,
+			  int i, int j,
+			  byte *texel)
+{
+    dxt3_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgba_dxt5 (int texImage_RowStride,
+			  const byte *texImage_Data,
+			  int i, int j,
+			  byte *texel)
+{
+    dxt5_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+tx_compress_dxtn (int srccomps, int width, int height,
+		  const byte *source, int destformat, byte *dest,
+		  int destRowStride)
+{
+    int srcRowStride = width * srccomps;
+    int rv;
+
+    switch (destformat) {
+	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+	    rv = dxt1_rgb_encode(width, height, srccomps,
+				 source, srcRowStride,
+				 dest, destRowStride);
+	    break;
+	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+	    rv = dxt1_rgba_encode(width, height, srccomps,
+				  source, srcRowStride,
+				  dest, destRowStride);
+	    break;
+	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+	    rv = dxt3_rgba_encode(width, height, srccomps,
+				  source, srcRowStride,
+				  dest, destRowStride);
+	    break;
+	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+	    rv = dxt5_rgba_encode(width, height, srccomps,
+				  source, srcRowStride,
+				  dest, destRowStride);
+	    break;
+	default:
+	    assert(0);
+    }
+
+    /*return rv;*/
+}