From 8ae5b052e77069d97fbcc088b3b36a02338becdd Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:10:23 -0400
Subject: [PATCH 1/9] line break consistency fix:  Mempak.H

---
 Source/Project64/N64 System/Mips/Mempak.H | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/Source/Project64/N64 System/Mips/Mempak.H b/Source/Project64/N64 System/Mips/Mempak.H
index 33c9d9e3b..68b4cae6b 100644
--- a/Source/Project64/N64 System/Mips/Mempak.H	
+++ b/Source/Project64/N64 System/Mips/Mempak.H	
@@ -9,13 +9,13 @@
 *                                                                           *
 ****************************************************************************/
 #pragma once
-
-class Mempak
-{
-public:
-	static void Close		 ( void );
-	static BYTE CalculateCrc ( BYTE * DataToCrc );
-	static void ReadFrom	 ( int Control, int Address, BYTE * Buffer );
-	static void WriteTo		 ( int Control, int Address, BYTE * Buffer );
-
-};
+
+class Mempak
+{
+public:
+	static void Close		 ( void );
+	static BYTE CalculateCrc ( BYTE * DataToCrc );
+	static void ReadFrom	 ( int Control, int Address, BYTE * Buffer );
+	static void WriteTo		 ( int Control, int Address, BYTE * Buffer );
+
+};

From 24d86ab32956cf1324a92db7f40fdb1b9a333b21 Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:12:49 -0400
Subject: [PATCH 2/9] line break consistency fix:  3dmathSIMD.asm.cpp (btw
 inline asm SUCKS)

---
 Source/Glide64/3dmathSIMD.asm.cpp | 116 +++++++++++++++---------------
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/Source/Glide64/3dmathSIMD.asm.cpp b/Source/Glide64/3dmathSIMD.asm.cpp
index 5a9e136b0..1a876511b 100644
--- a/Source/Glide64/3dmathSIMD.asm.cpp
+++ b/Source/Glide64/3dmathSIMD.asm.cpp
@@ -42,18 +42,18 @@ segment .text
 extern "C" void __declspec(naked) DetectSIMD(int func, int * iedx, int * iecx)
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 		mov       eax,[func]
 		cpuid
 		mov       eax,[iedx]
 		mov       [eax],edx
 		mov       eax,[iecx]
 		mov       [eax],ecx
-		leave
-		ret
-	}
-}
+		leave
+		ret
+	}
+}
 
 /****************************************************************
 ;
@@ -65,8 +65,8 @@ extern "C" void __declspec(naked) TransformVectorSSE(float *src, float *dst, flo
 {
 	__asm
 	{
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
 		mov       ecx,[src]
 		mov       eax,[dst]
@@ -91,17 +91,17 @@ extern "C" void __declspec(naked) TransformVectorSSE(float *src, float *dst, flo
 		addps     xmm0,xmm2     ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] 0 mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
 		movss     [eax],xmm0    ; mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
 		movhps    [eax+4],xmm0  ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2]
-		leave
-		ret
-	}
-}
+		leave
+		ret
+	}
+}
 
 extern "C" void __declspec(naked) MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
 {
 	__asm
 	{
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
       mov       eax,[r]      
       mov       ecx,[m1]
@@ -204,17 +204,17 @@ extern "C" void __declspec(naked) MulMatricesSSE(float m1[4][4],float m2[4][4],f
       addps     xmm4,xmm7
       
       movaps    [eax+48],xmm4
-	  leave
-	  ret
-	}
-}
+	  leave
+	  ret
+	}
+}
 
 extern "C" void __declspec(naked) NormalizeVectorSSE (float *v)
 {
 	_asm
 	{
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
       mov edx, [v]
       movaps xmm0, [edx]      ; x y z 0
@@ -229,10 +229,10 @@ extern "C" void __declspec(naked) NormalizeVectorSSE (float *v)
       rsqrtps xmm0, xmm0      ; 1.0/sqrt(x*x+z*z+y*y)
       mulps  xmm2, xmm0       ; x/sqrt(x*x+z*z+y*y) y/sqrt(x*x+z*z+y*y) z/sqrt(x*x+z*z+y*y) 0
       movaps [edx], xmm2
-	  leave
-	  ret
-	}
-}
+	  leave
+	  ret
+	}
+}
 
 /*****************************************************************
 ;
@@ -243,8 +243,8 @@ extern "C" void __declspec(naked) NormalizeVectorSSE (float *v)
 float __declspec(naked) DotProductSSE3(register float *v1, register float *v2)
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
       mov eax,[v1]
       mov edx,[v2]
@@ -253,10 +253,10 @@ float __declspec(naked) DotProductSSE3(register float *v1, register float *v2)
       haddps xmm0, xmm0
       haddps xmm0, xmm0
 ;      movss eax, xmm0
-		leave
-		ret
-	}
-}
+		leave
+		ret
+	}
+}
 
 /****************************************************************
 ;
@@ -267,8 +267,8 @@ float __declspec(naked) DotProductSSE3(register float *v1, register float *v2)
 extern "C" void __declspec(naked) TransformVector3DNOW(float *src, float *dst, float mat[4][4])
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
     femms
       mov         ecx,[src]
@@ -296,16 +296,16 @@ extern "C" void __declspec(naked) TransformVector3DNOW(float *src, float *dst, f
       movq        [eax],mm0     ; mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
       movd        [eax+8],mm3   ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2]
       femms
-		  leave
-		  ret
-	}
-}
+		  leave
+		  ret
+	}
+}
 
 extern "C" void __declspec(naked) InverseTransformVector3DNOW(float *src, float *dst, float mat[4][4])
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
     femms
       mov         ecx,[src]
@@ -333,16 +333,16 @@ extern "C" void __declspec(naked) InverseTransformVector3DNOW(float *src, float
       movq        [eax],mm0     ; mat[1][0]*src[0]+mat[1][1]*src[1]+mat[1][2]*src[2] mat[0][0]*src[0]+mat[0][1]*src[1]+mat[0][2]*src[2]
       movd        [eax+8],mm2   ; mat[2][0]*src[0]+mat[2][1]*src[1]+mat[2][2]*src[2]
       femms                    
-		  leave
-		  ret
-	}
-}
+		  leave
+		  ret
+	}
+}
 
 extern "C" void  __declspec(naked) MulMatrices3DNOW(float m1[4][4],float m2[4][4],float r[4][4])
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 
     femms
       mov         ecx,[m1]
@@ -465,16 +465,16 @@ extern "C" void  __declspec(naked) MulMatrices3DNOW(float m1[4][4],float m2[4][4
       movq        [eax+48],mm7
       movq        [eax+56],mm3
       femms
-	  leave
-	  ret
-	}
-}
+	  leave
+	  ret
+	}
+}
 
 extern "C" float  __declspec(naked) DotProduct3DNOW(register float *v1, register float *v2)
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
       
       femms
       mov         edx,[v1]
@@ -489,15 +489,15 @@ extern "C" float  __declspec(naked) DotProduct3DNOW(register float *v1, register
       pfadd       mm0,mm1
       movd        eax,mm0
       femms
-	  leave
-	  ret
-	}
-}
+	  leave
+	  ret
+	}
+}
 
 extern "C" void __declspec(naked) NormalizeVector3DNOW(float *v)
 {
 	_asm {
-		push ebp
+		push ebp
 		mov ebp,esp      
       femms
       mov          edx,[v]
@@ -521,7 +521,7 @@ extern "C" void __declspec(naked) NormalizeVector3DNOW(float *v)
       movq         [edx],mm1
       movq         [edx+8],mm2
       femms
-	  leave
-	  ret
-	}
+	  leave
+	  ret
+	}
 }

From dbf981697e0e14482a298c721a857ee7a00763a6 Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:13:15 -0400
Subject: [PATCH 3/9] line break consistency fix:  Debugger.cpp

---
 Source/Glide64/Debugger.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Source/Glide64/Debugger.cpp b/Source/Glide64/Debugger.cpp
index c0ac22f38..46a9091e4 100644
--- a/Source/Glide64/Debugger.cpp
+++ b/Source/Glide64/Debugger.cpp
@@ -67,7 +67,7 @@ GLIDE64_DEBUGGER _debugger;
 #define COL_GRID    0xFFFFFF80
 
 int  grid = 0;
-static const char *tri_type[4] = { "TRIANGLE", "TEXRECT", "FILLRECT", "BACKGROUND" };
+static const char *tri_type[4] = { "TRIANGLE", "TEXRECT", "FILLRECT", "BACKGROUND" };
 
 //Platform-specific stuff
 #ifndef __WINDOWS__
@@ -212,7 +212,7 @@ void debug_cacheviewer ()
           { SX(x*64.0f), SY(512+64.0f*i), 1, 1,       0, 0, 0, 0, {0, 0, 0, 0} },
           { SX(x*64.0f+64.0f*cache[x+y*16].scale_x), SY(512+64.0f*i), 1, 1,    255*cache[x+y*16].scale_x, 0, 0, 0, {0, 0, 0, 0} },
           { SX(x*64.0f), SY(512+64.0f*i+64.0f*cache[x+y*16].scale_y), 1, 1,    0, 255*cache[x+y*16].scale_y, 0, 0, {0, 0, 0, 0} },
-          { SX(x*64.0f+64.0f*cache[x+y*16].scale_x), SY(512+64.0f*i+64.0f*cache[x+y*16].scale_y), 1, 1, 255*cache[x+y*16].scale_x, 255*cache[x+y*16].scale_y, 0, 0, {0, 0, 0, 0} }
+          { SX(x*64.0f+64.0f*cache[x+y*16].scale_x), SY(512+64.0f*i+64.0f*cache[x+y*16].scale_y), 1, 1, 255*cache[x+y*16].scale_x, 255*cache[x+y*16].scale_y, 0, 0, {0, 0, 0, 0} }
           };
       for
       (int i=0; i<4; i++)
@@ -364,7 +364,7 @@ void debug_capture ()
               { SX(704.0f), SY(221.0f), 1, 1, 0, 0,  0, 0, {0, 0, 0, 0} },
               { SX(704.0f+256.0f*scx), SY(221.0f), 1, 1, 255*scx, 0, 255*scx, 0, {0, 0, 0, 0} },
               { SX(704.0f), SY(221.0f+256.0f*scy), 1, 1, 0, 255*scy, 0, 255*scy, {0, 0, 0, 0} },
-              { SX(704.0f+256.0f*scx), SY(221.0f+256.0f*scy), 1, 1, 255*scx, 255*scy, 255*scx, 255*scy, {0, 0, 0, 0} }
+              { SX(704.0f+256.0f*scx), SY(221.0f+256.0f*scy), 1, 1, 255*scx, 255*scy, 255*scx, 255*scy, {0, 0, 0, 0} }
               };
       ConvertCoordsConvert (v, 4);
       VERTEX *varr[4] = { &v[0], &v[1], &v[2], &v[3] };
@@ -851,7 +851,7 @@ void debug_mouse ()
     { cx,       cy, 1, 1,   0,   0,   0, 0, {0, 0, 0, 0} },
     { cx+32,    cy, 1, 1, 255,   0,   0, 0, {0, 0, 0, 0} },
     { cx,    cy+32, 1, 1,   0, 255,   0, 0, {0, 0, 0, 0} },
-    { cx+32, cy+32, 1, 1, 255, 255,   0, 0, {0, 0, 0, 0} }
+    { cx+32, cy+32, 1, 1, 255, 255,   0, 0, {0, 0, 0, 0} }
     };
 
   ConvertCoordsKeep (v, 4);
@@ -996,7 +996,7 @@ void output (float x, float y, int scale, const char *fmt, ...)
     VERTEX v[4] = { { SX(x), SY(768-y), 1, 1,   (float)c, r+16.0f, 0, 0, {0, 0, 0, 0} },
       { SX(x+8), SY(768-y), 1, 1,   c+8.0f, r+16.0f, 0, 0, {0, 0, 0, 0} },
       { SX(x), SY(768-y-16), 1, 1,  (float)c, (float)r, 0, 0, {0, 0, 0, 0} },
-      { SX(x+8), SY(768-y-16), 1, 1,  c+8.0f, (float)r, 0, 0, {0, 0, 0, 0} }
+      { SX(x+8), SY(768-y-16), 1, 1,  c+8.0f, (float)r, 0, 0, {0, 0, 0, 0} }
       };
     if (!scale)
     {

From 3bf2ac641b23e95b0d8778fd833a39111b0e69ae Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:14:21 -0400
Subject: [PATCH 4/9] line break consistency fix:  FixedPoint.asm.cpp (MORE
 POINTLESS ASM!)

---
 Source/Glide64/FixedPoint.asm.cpp | 34 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/Source/Glide64/FixedPoint.asm.cpp b/Source/Glide64/FixedPoint.asm.cpp
index 69284ee7d..24088f2a8 100644
--- a/Source/Glide64/FixedPoint.asm.cpp
+++ b/Source/Glide64/FixedPoint.asm.cpp
@@ -39,45 +39,45 @@
 extern "C" int __declspec(naked) imul16(int x, int y)
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 		mov   eax, [x]
 		mov   edx, [y]
 		imul  edx        
 		shrd  eax,edx,16
-		leave
-		ret
-	}
-}
+		leave
+		ret
+	}
+}
 
 //(x * y) >> 14
 extern "C" int  __declspec(naked) imul14(int x, int y)
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 		mov   eax, [x]
 		mov   edx, [y]
 		imul  edx        
 		shrd  eax,edx,14
-		leave
-		ret
-	}
-}
+		leave
+		ret
+	}
+}
 
 //(x << 16) / y
 extern "C" int __declspec(naked) idiv16(int x, int y)
 {
 	_asm {
-		push ebp
-		mov ebp,esp
+		push ebp
+		mov ebp,esp
 		mov   eax, [x]
 		mov   ebx, [y]
 		mov   edx,eax   
 		sar   edx,16
 		shl   eax,16    
 		idiv  ebx  
-		leave
-		ret
-	}
+		leave
+		ret
+	}
 }
\ No newline at end of file

From 42de60ca44e9bb23d94bedcee18c7bef92cc9a80 Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:15:42 -0400
Subject: [PATCH 5/9] line break consistency fix:  Util.cpp

---
 Source/Glide64/Util.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/Source/Glide64/Util.cpp b/Source/Glide64/Util.cpp
index 76d186db2..4a9923697 100644
--- a/Source/Glide64/Util.cpp
+++ b/Source/Glide64/Util.cpp
@@ -909,7 +909,7 @@ __inline double interp3p(float a, float b, float c, double r1, double r2)
 {
   return (a)+(((b)+((c)-(b))*(r2))-(a))*(r1);
 }
-/*
+/*
 #define interp3p(a, b, c, r1, r2) \
   (a+(((b)+((c)-(b))*(r2))-(a))*(r1))
 */
@@ -1925,32 +1925,32 @@ void update ()
 
       if (((rdp.flags & ZBUF_ENABLED) || rdp.zsrc == 1) && rdp.cycle_mode < 2)
       {
-        if (rdp.flags & ZBUF_COMPARE)
+        if (rdp.flags & ZBUF_COMPARE)
         {
           switch ((rdp.rm & 0xC00)>>10) {
-            case 0:
-              grDepthBiasLevel(0);
-              grDepthBufferFunction (settings.zmode_compare_less ? GR_CMP_LESS : GR_CMP_LEQUAL);
-              break;
-            case 1:
-              grDepthBiasLevel(-4);
-              grDepthBufferFunction (settings.zmode_compare_less ? GR_CMP_LESS : GR_CMP_LEQUAL);
-              break;
-            case 2:
+            case 0:
+              grDepthBiasLevel(0);
+              grDepthBufferFunction (settings.zmode_compare_less ? GR_CMP_LESS : GR_CMP_LEQUAL);
+              break;
+            case 1:
+              grDepthBiasLevel(-4);
+              grDepthBufferFunction (settings.zmode_compare_less ? GR_CMP_LESS : GR_CMP_LEQUAL);
+              break;
+            case 2:
               grDepthBiasLevel(settings.ucode == 7 ? -4 : 0);
               grDepthBufferFunction (GR_CMP_LESS);
               break;
             case 3:
-              // will be set dynamically per polygon
-              //grDepthBiasLevel(-deltaZ);
+              // will be set dynamically per polygon
+              //grDepthBiasLevel(-deltaZ);
               grDepthBufferFunction (GR_CMP_LEQUAL);
               break;
-          }
+          }
         }
-        else
+        else
         {
-          grDepthBiasLevel(0);
-          grDepthBufferFunction (GR_CMP_ALWAYS);
+          grDepthBiasLevel(0);
+          grDepthBufferFunction (GR_CMP_ALWAYS);
         }
 
         if (rdp.flags & ZBUF_UPDATE)
@@ -1960,7 +1960,7 @@ void update ()
       }
       else
       {
-        grDepthBiasLevel(0);
+        grDepthBiasLevel(0);
         grDepthBufferFunction (GR_CMP_ALWAYS);
         grDepthMask (FXFALSE);
       }

From 56dfb581ed3496d7e89da9c11b77a4f06169c8f8 Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:16:16 -0400
Subject: [PATCH 6/9] line break consistency fix:  rdp.h

---
 Source/Glide64/rdp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/Glide64/rdp.h b/Source/Glide64/rdp.h
index d48a3358f..fff889af6 100644
--- a/Source/Glide64/rdp.h
+++ b/Source/Glide64/rdp.h
@@ -598,9 +598,9 @@ struct RDP_Base{
   wxUint32 fill_color;
   wxUint32 prim_color;
   wxUint32 blend_color;
-  wxUint32 env_color;
+  wxUint32 env_color;
   wxUint32 SCALE;
-  wxUint32 CENTER;
+  wxUint32 CENTER;
   wxUint32 prim_lodmin, prim_lodfrac;
   wxUint16 prim_depth;
   wxUint16 prim_dz;

From 63c4ad0182014c9b5a0dfd4999b5553cd568ebe3 Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:16:40 -0400
Subject: [PATCH 7/9] line break consistency fix:  ucode00.h

---
 Source/Glide64/ucode00.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/Glide64/ucode00.h b/Source/Glide64/ucode00.h
index 52f71b401..e7f8a099b 100644
--- a/Source/Glide64/ucode00.h
+++ b/Source/Glide64/ucode00.h
@@ -879,7 +879,7 @@ static void uc0_setothermode_h()
 
   if (mask & 0x00300000)  // cycle type
   {
-    rdp.cycle_mode = (wxUint8)((rdp.othermode_h & 0x00300000) >> 20);
+    rdp.cycle_mode = (wxUint8)((rdp.othermode_h & 0x00300000) >> 20);
     rdp.update |= UPDATE_ZBUF_ENABLED;
     FRDP ("cycletype: %d\n", rdp.cycle_mode);
   }
@@ -942,7 +942,7 @@ static void uc0_setothermode_l()
   {
     rdp.zsrc = (rdp.othermode_l & 0x00000004) >> 2;
     FRDP ("z-src sel: %s\n", str_zs[rdp.zsrc]);
-    FRDP ("z-src sel: %08lx\n", rdp.zsrc);
+    FRDP ("z-src sel: %08lx\n", rdp.zsrc);
     rdp.update |= UPDATE_ZBUF_ENABLED;
   }
 

From 822de92789d84353cd33805a6910a8b53aa424e7 Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:17:16 -0400
Subject: [PATCH 8/9] line break consistency fix:  ucode06.h

---
 Source/Glide64/ucode06.h | 532 +++++++++++++++++++--------------------
 1 file changed, 266 insertions(+), 266 deletions(-)

diff --git a/Source/Glide64/ucode06.h b/Source/Glide64/ucode06.h
index ad0dbe56a..29c5b7c3c 100644
--- a/Source/Glide64/ucode06.h
+++ b/Source/Glide64/ucode06.h
@@ -39,82 +39,82 @@
 
 // STANDARD DRAWIMAGE - draws a 2d image based on the following structure
 
-static float set_sprite_combine_mode ()
-{
-  if (rdp.cycle_mode == 2)
-  {
-    rdp.tex = 1;
-    rdp.allow_combine = 0;
-    // Now actually combine !
+static float set_sprite_combine_mode ()
+{
+  if (rdp.cycle_mode == 2)
+  {
+    rdp.tex = 1;
+    rdp.allow_combine = 0;
+    // Now actually combine !
     GrCombineFunction_t color_source = GR_COMBINE_FUNCTION_LOCAL;
     if (rdp.tbuff_tex && rdp.tbuff_tex->info.format == GR_TEXFMT_ALPHA_INTENSITY_88)
 		color_source = GR_COMBINE_FUNCTION_LOCAL_ALPHA;
-    cmb.tmu1_func = cmb.tmu0_func = color_source;
-    cmb.tmu1_fac = cmb.tmu0_fac = GR_COMBINE_FACTOR_NONE;
-    cmb.tmu1_a_func = cmb.tmu0_a_func = GR_COMBINE_FUNCTION_LOCAL;
-    cmb.tmu1_a_fac = cmb.tmu0_a_fac = GR_COMBINE_FACTOR_NONE;
-    cmb.tmu1_invert = cmb.tmu0_invert = FXFALSE;
-    cmb.tmu1_a_invert = cmb.tmu0_a_invert = FXFALSE;
-  }
-
-  rdp.update |= UPDATE_COMBINE;
-  update ();
-
-  rdp.allow_combine = 1;
-
-  // set z buffer mode
-  float Z = 0.0f;
-  if ((rdp.othermode_l & 0x00000030) && rdp.cycle_mode < 2)
-  {
-    wxUint16 prim_dz = 0;
-    if (rdp.zsrc == 1)
-    {
-      Z = rdp.prim_depth;
-      prim_dz = rdp.prim_dz;
-    }
-    FRDP ("prim_depth = %d, prim_dz = %d\n", rdp.prim_depth, rdp.prim_dz);
-    Z = ScaleZ(Z);
-
-    if (rdp.othermode_l & 0x00000400)
-      grDepthBiasLevel(rdp.prim_dz);
-  }
-  else
-  {
-    LRDP("z compare not used, using 0\n");
-  }
-
-  grCullMode (GR_CULL_DISABLE);
-  grFogMode (GR_FOG_DISABLE);
-  rdp.update |= UPDATE_CULL_MODE | UPDATE_FOG_ENABLED;
-
-  if (rdp.cycle_mode == 2)
-  {
-    grColorCombine (GR_COMBINE_FUNCTION_SCALE_OTHER,
-      GR_COMBINE_FACTOR_ONE,
-      GR_COMBINE_LOCAL_NONE,
-      GR_COMBINE_OTHER_TEXTURE,
-      FXFALSE);
-    grAlphaCombine (GR_COMBINE_FUNCTION_SCALE_OTHER,
-      GR_COMBINE_FACTOR_ONE,
-      GR_COMBINE_LOCAL_NONE,
-      GR_COMBINE_OTHER_TEXTURE,
-      FXFALSE);
-    grAlphaBlendFunction (GR_BLEND_ONE,
-      GR_BLEND_ZERO,
-      GR_BLEND_ZERO,
-      GR_BLEND_ZERO);
-    if (rdp.othermode_l & 1)
-    {
-      grAlphaTestFunction (GR_CMP_GEQUAL);
-      grAlphaTestReferenceValue (0x80);
-    }
-    else
-      grAlphaTestFunction (GR_CMP_ALWAYS);
-    rdp.update |= UPDATE_ALPHA_COMPARE | UPDATE_COMBINE;
-  }
-  return Z;
-}
-
+    cmb.tmu1_func = cmb.tmu0_func = color_source;
+    cmb.tmu1_fac = cmb.tmu0_fac = GR_COMBINE_FACTOR_NONE;
+    cmb.tmu1_a_func = cmb.tmu0_a_func = GR_COMBINE_FUNCTION_LOCAL;
+    cmb.tmu1_a_fac = cmb.tmu0_a_fac = GR_COMBINE_FACTOR_NONE;
+    cmb.tmu1_invert = cmb.tmu0_invert = FXFALSE;
+    cmb.tmu1_a_invert = cmb.tmu0_a_invert = FXFALSE;
+  }
+
+  rdp.update |= UPDATE_COMBINE;
+  update ();
+
+  rdp.allow_combine = 1;
+
+  // set z buffer mode
+  float Z = 0.0f;
+  if ((rdp.othermode_l & 0x00000030) && rdp.cycle_mode < 2)
+  {
+    wxUint16 prim_dz = 0;
+    if (rdp.zsrc == 1)
+    {
+      Z = rdp.prim_depth;
+      prim_dz = rdp.prim_dz;
+    }
+    FRDP ("prim_depth = %d, prim_dz = %d\n", rdp.prim_depth, rdp.prim_dz);
+    Z = ScaleZ(Z);
+
+    if (rdp.othermode_l & 0x00000400)
+      grDepthBiasLevel(rdp.prim_dz);
+  }
+  else
+  {
+    LRDP("z compare not used, using 0\n");
+  }
+
+  grCullMode (GR_CULL_DISABLE);
+  grFogMode (GR_FOG_DISABLE);
+  rdp.update |= UPDATE_CULL_MODE | UPDATE_FOG_ENABLED;
+
+  if (rdp.cycle_mode == 2)
+  {
+    grColorCombine (GR_COMBINE_FUNCTION_SCALE_OTHER,
+      GR_COMBINE_FACTOR_ONE,
+      GR_COMBINE_LOCAL_NONE,
+      GR_COMBINE_OTHER_TEXTURE,
+      FXFALSE);
+    grAlphaCombine (GR_COMBINE_FUNCTION_SCALE_OTHER,
+      GR_COMBINE_FACTOR_ONE,
+      GR_COMBINE_LOCAL_NONE,
+      GR_COMBINE_OTHER_TEXTURE,
+      FXFALSE);
+    grAlphaBlendFunction (GR_BLEND_ONE,
+      GR_BLEND_ZERO,
+      GR_BLEND_ZERO,
+      GR_BLEND_ZERO);
+    if (rdp.othermode_l & 1)
+    {
+      grAlphaTestFunction (GR_CMP_GEQUAL);
+      grAlphaTestReferenceValue (0x80);
+    }
+    else
+      grAlphaTestFunction (GR_CMP_ALWAYS);
+    rdp.update |= UPDATE_ALPHA_COMPARE | UPDATE_COMBINE;
+  }
+  return Z;
+}
+
 void uc6_sprite2d ();
 
 typedef struct DRAWIMAGE_t {
@@ -135,23 +135,23 @@ typedef struct DRAWIMAGE_t {
   float scaleX;
   float scaleY;
 } DRAWIMAGE;
-
-typedef struct DRAWOBJECT_t {
-  float objX;
-  float objY;
-  float scaleW;
-  float scaleH;
-  short imageW;
-  short imageH;
-
-  wxUint16  imageStride;
-  wxUint16  imageAdrs;
-  wxUint8  imageFmt;
-  wxUint8  imageSiz;
-  wxUint8  imagePal;
-  wxUint8  imageFlags;
-} DRAWOBJECT;
-
+
+typedef struct DRAWOBJECT_t {
+  float objX;
+  float objY;
+  float scaleW;
+  float scaleH;
+  short imageW;
+  short imageH;
+
+  wxUint16  imageStride;
+  wxUint16  imageAdrs;
+  wxUint8  imageFmt;
+  wxUint8  imageSiz;
+  wxUint8  imagePal;
+  wxUint8  imageFlags;
+} DRAWOBJECT;
+
 void DrawHiresDepthImage (const DRAWIMAGE & d)
 {
   wxUint16 * src = (wxUint16*)(gfx.RDRAM+d.imagePtr);
@@ -293,56 +293,56 @@ void DrawImage (DRAWIMAGE & d)
   if (d.imageW == 0 || d.imageH == 0 || d.frameH == 0)   return;
 
   int x_size, y_size, x_shift, y_shift, line;
-  // choose optimum size for the format/size
-  switch (d.imageSiz)
-  {
+  // choose optimum size for the format/size
+  switch (d.imageSiz)
+  {
   case 0:
     if (rdp.tlut_mode < 2)
     {
       y_size = 64;
       y_shift = 6;
-    }
-    else
-    {
+    }
+    else
+    {
       y_size = 32;
       y_shift = 5;
-    }
+    }
     x_size = 128;
     x_shift = 7;
-    line = 8;
-    break;
+    line = 8;
+    break;
   case 1:
     if (rdp.tlut_mode < 2)
     {
       y_size = 64;
       y_shift = 6;
-    }
-    else
-    {
+    }
+    else
+    {
       y_size = 32;
       y_shift = 5;
-    }
+    }
     x_size = 64;
     x_shift = 6;
-    line = 8;
-    break;
+    line = 8;
+    break;
   case 2:
     x_size = 64;
     y_size = 32;
     x_shift = 6;
     y_shift = 5;
-    line = 16;
-    break;
+    line = 16;
+    break;
   case 3:
     x_size = 32;
     y_size = 16;
     x_shift = 4;
     y_shift = 3;
-    line = 16;
-    break;
-  default:
-    FRDP("DrawImage. unknown image size: %d\n", d.imageSiz);
-    return;
+    line = 16;
+    break;
+  default:
+    FRDP("DrawImage. unknown image size: %d\n", d.imageSiz);
+    return;
   }
 
   if (rdp.ci_width == 512 && !no_dlist) //RE2
@@ -443,8 +443,8 @@ void DrawImage (DRAWIMAGE & d)
   rdp.tiles[0].lr_t = y_size-1;
 
   const float Z = set_sprite_combine_mode ();
-  if (rdp.cycle_mode == 2)
-    rdp.allow_combine = 0;
+  if (rdp.cycle_mode == 2)
+    rdp.allow_combine = 0;
 
   if (fullscreen)
   {
@@ -453,7 +453,7 @@ void DrawImage (DRAWIMAGE & d)
     else if (d.scaleX == 1.0f && d.scaleY == 1.0f)
       grClipWindow (rdp.scissor.ul_x, rdp.scissor.ul_y, rdp.scissor.lr_x, rdp.scissor.lr_y);
     else
-      grClipWindow (rdp.scissor.ul_x, rdp.scissor.ul_y, min(rdp.scissor.lr_x, (wxUint32)((d.frameX+d.imageW/d.scaleX+0.5f)*rdp.scale_x)), min(rdp.scissor.lr_y, (wxUint32)((d.frameY+d.imageH/d.scaleY+0.5f)*rdp.scale_y)));
+      grClipWindow (rdp.scissor.ul_x, rdp.scissor.ul_y, min(rdp.scissor.lr_x, (wxUint32)((d.frameX+d.imageW/d.scaleX+0.5f)*rdp.scale_x)), min(rdp.scissor.lr_y, (wxUint32)((d.frameY+d.imageH/d.scaleY+0.5f)*rdp.scale_y)));
     rdp.update |=  UPDATE_SCISSOR;
   }
 
@@ -715,89 +715,89 @@ struct MAT2D {
   float BaseScaleX;
   float BaseScaleY;
 } mat_2d = {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f};
-
-static void uc6_read_background_data (DRAWIMAGE & d, bool bReadScale)
-{
-  wxUint32 addr = segoffset(rdp.cmd1) >> 1;
-
-  d.imageX      = (((wxUint16 *)gfx.RDRAM)[(addr+0)^1] >> 5);   // 0
-  d.imageW      = (((wxUint16 *)gfx.RDRAM)[(addr+1)^1] >> 2);   // 1
-  d.frameX      = ((short*)gfx.RDRAM)[(addr+2)^1] / 4.0f;       // 2
-  d.frameW      = ((wxUint16 *)gfx.RDRAM)[(addr+3)^1] >> 2;             // 3
-
-  d.imageY      = (((wxUint16 *)gfx.RDRAM)[(addr+4)^1] >> 5);   // 4
-  d.imageH      = (((wxUint16 *)gfx.RDRAM)[(addr+5)^1] >> 2);   // 5
-  d.frameY      = ((short*)gfx.RDRAM)[(addr+6)^1] / 4.0f;       // 6
-  d.frameH      = ((wxUint16 *)gfx.RDRAM)[(addr+7)^1] >> 2;             // 7
-
-  d.imagePtr    = segoffset(((wxUint32*)gfx.RDRAM)[(addr+8)>>1]);       // 8,9
-  d.imageFmt    = ((wxUint8 *)gfx.RDRAM)[(((addr+11)<<1)+0)^3]; // 11
-  d.imageSiz    = ((wxUint8 *)gfx.RDRAM)[(((addr+11)<<1)+1)^3]; // |
-  d.imagePal    = ((wxUint16 *)gfx.RDRAM)[(addr+12)^1]; // 12
-  wxUint16 imageFlip = ((wxUint16 *)gfx.RDRAM)[(addr+13)^1];    // 13;
-  d.flipX       = (wxUint8)imageFlip&0x01;
-
-  if (bReadScale)
-  {
-    d.scaleX      = ((short *)gfx.RDRAM)[(addr+14)^1] / 1024.0f;  // 14
-    d.scaleY      = ((short *)gfx.RDRAM)[(addr+15)^1] / 1024.0f;  // 15
-  }
-  else
-    d.scaleX = d.scaleY = 1.0f;
-
-  d.flipY       = 0;
-  int imageYorig= ((int *)gfx.RDRAM)[(addr+16)>>1] >> 5;
-  rdp.last_bg = d.imagePtr;
-
-  FRDP ("imagePtr: %08lx\n", d.imagePtr);
-  FRDP ("frameX: %f, frameW: %d, frameY: %f, frameH: %d\n", d.frameX, d.frameW, d.frameY, d.frameH);
-  FRDP ("imageX: %d, imageW: %d, imageY: %d, imageH: %d\n", d.imageX, d.imageW, d.imageY, d.imageH);
-  FRDP ("imageYorig: %d, scaleX: %f, scaleY: %f\n", imageYorig, d.scaleX, d.scaleY);
-  FRDP ("imageFmt: %d, imageSiz: %d, imagePal: %d, imageFlip: %d\n", d.imageFmt, d.imageSiz, d.imagePal, d.flipX);
-}
-
-static void uc6_bg (bool bg_1cyc)
-{
-  static const char *strFuncNames[] = {"uc6:bg_1cyc", "uc6:bg_copy"};
-  const char *strFuncName =  bg_1cyc ? strFuncNames[0] : strFuncNames[1];
-  if (rdp.skip_drawing)
-  {
-    FRDP("%s skipped\n", strFuncName);
-    return;
-  }
-  FRDP ("%s #%d, #%d\n", strFuncName, rdp.tri_n, rdp.tri_n+1);
-
-  DRAWIMAGE d;
-  uc6_read_background_data(d, bg_1cyc);
-
-  if (fb_hwfbe_enabled && FindTextureBuffer(d.imagePtr, d.imageW))
-  {
-    DrawHiresImage(d);
-    return;
-  }
-
-  if (settings.ucode == ucode_F3DEX2 || (settings.hacks&hack_PPL))
-  {
-    if ( (d.imagePtr != rdp.cimg) && (d.imagePtr != rdp.ocimg) && d.imagePtr) //can't draw from framebuffer
-      DrawImage (d);
-    else
-    {
-      FRDP("%s skipped\n", strFuncName);
-    }
-  }
-  else
-  {
-    DrawImage (d);
-  }
-}
+
+static void uc6_read_background_data (DRAWIMAGE & d, bool bReadScale)
+{
+  wxUint32 addr = segoffset(rdp.cmd1) >> 1;
+
+  d.imageX      = (((wxUint16 *)gfx.RDRAM)[(addr+0)^1] >> 5);   // 0
+  d.imageW      = (((wxUint16 *)gfx.RDRAM)[(addr+1)^1] >> 2);   // 1
+  d.frameX      = ((short*)gfx.RDRAM)[(addr+2)^1] / 4.0f;       // 2
+  d.frameW      = ((wxUint16 *)gfx.RDRAM)[(addr+3)^1] >> 2;             // 3
+
+  d.imageY      = (((wxUint16 *)gfx.RDRAM)[(addr+4)^1] >> 5);   // 4
+  d.imageH      = (((wxUint16 *)gfx.RDRAM)[(addr+5)^1] >> 2);   // 5
+  d.frameY      = ((short*)gfx.RDRAM)[(addr+6)^1] / 4.0f;       // 6
+  d.frameH      = ((wxUint16 *)gfx.RDRAM)[(addr+7)^1] >> 2;             // 7
+
+  d.imagePtr    = segoffset(((wxUint32*)gfx.RDRAM)[(addr+8)>>1]);       // 8,9
+  d.imageFmt    = ((wxUint8 *)gfx.RDRAM)[(((addr+11)<<1)+0)^3]; // 11
+  d.imageSiz    = ((wxUint8 *)gfx.RDRAM)[(((addr+11)<<1)+1)^3]; // |
+  d.imagePal    = ((wxUint16 *)gfx.RDRAM)[(addr+12)^1]; // 12
+  wxUint16 imageFlip = ((wxUint16 *)gfx.RDRAM)[(addr+13)^1];    // 13;
+  d.flipX       = (wxUint8)imageFlip&0x01;
+
+  if (bReadScale)
+  {
+    d.scaleX      = ((short *)gfx.RDRAM)[(addr+14)^1] / 1024.0f;  // 14
+    d.scaleY      = ((short *)gfx.RDRAM)[(addr+15)^1] / 1024.0f;  // 15
+  }
+  else
+    d.scaleX = d.scaleY = 1.0f;
+
+  d.flipY       = 0;
+  int imageYorig= ((int *)gfx.RDRAM)[(addr+16)>>1] >> 5;
+  rdp.last_bg = d.imagePtr;
+
+  FRDP ("imagePtr: %08lx\n", d.imagePtr);
+  FRDP ("frameX: %f, frameW: %d, frameY: %f, frameH: %d\n", d.frameX, d.frameW, d.frameY, d.frameH);
+  FRDP ("imageX: %d, imageW: %d, imageY: %d, imageH: %d\n", d.imageX, d.imageW, d.imageY, d.imageH);
+  FRDP ("imageYorig: %d, scaleX: %f, scaleY: %f\n", imageYorig, d.scaleX, d.scaleY);
+  FRDP ("imageFmt: %d, imageSiz: %d, imagePal: %d, imageFlip: %d\n", d.imageFmt, d.imageSiz, d.imagePal, d.flipX);
+}
+
+static void uc6_bg (bool bg_1cyc)
+{
+  static const char *strFuncNames[] = {"uc6:bg_1cyc", "uc6:bg_copy"};
+  const char *strFuncName =  bg_1cyc ? strFuncNames[0] : strFuncNames[1];
+  if (rdp.skip_drawing)
+  {
+    FRDP("%s skipped\n", strFuncName);
+    return;
+  }
+  FRDP ("%s #%d, #%d\n", strFuncName, rdp.tri_n, rdp.tri_n+1);
+
+  DRAWIMAGE d;
+  uc6_read_background_data(d, bg_1cyc);
+
+  if (fb_hwfbe_enabled && FindTextureBuffer(d.imagePtr, d.imageW))
+  {
+    DrawHiresImage(d);
+    return;
+  }
+
+  if (settings.ucode == ucode_F3DEX2 || (settings.hacks&hack_PPL))
+  {
+    if ( (d.imagePtr != rdp.cimg) && (d.imagePtr != rdp.ocimg) && d.imagePtr) //can't draw from framebuffer
+      DrawImage (d);
+    else
+    {
+      FRDP("%s skipped\n", strFuncName);
+    }
+  }
+  else
+  {
+    DrawImage (d);
+  }
+}
 
 static void uc6_bg_1cyc ()
-{
+{
   uc6_bg(true);
 }
-
+
 static void uc6_bg_copy ()
-{
+{
   uc6_bg(false);
 }
 
@@ -997,67 +997,67 @@ static void uc6_draw_polygons (VERTEX v[4])
   }
 }
 
-static void uc6_read_object_data (DRAWOBJECT & d)
-{
-  wxUint32 addr = segoffset(rdp.cmd1) >> 1;
-
-  d.objX            = ((short*)gfx.RDRAM)[(addr+0)^1] / 4.0f;               // 0
-  d.scaleW  = ((wxUint16 *)gfx.RDRAM)[(addr+1)^1] / 1024.0f;        // 1
-  d.imageW  = ((short*)gfx.RDRAM)[(addr+2)^1] >> 5;                 // 2, 3 is padding
-  d.objY            = ((short*)gfx.RDRAM)[(addr+4)^1] / 4.0f;               // 4
-  d.scaleH  = ((wxUint16 *)gfx.RDRAM)[(addr+5)^1] / 1024.0f;        // 5
-  d.imageH  = ((short*)gfx.RDRAM)[(addr+6)^1] >> 5;                 // 6, 7 is padding
-
-  d.imageStride = ((wxUint16 *)gfx.RDRAM)[(addr+8)^1];                  // 8
-  d.imageAdrs           = ((wxUint16 *)gfx.RDRAM)[(addr+9)^1];                  // 9
-  d.imageFmt             = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+0)^3]; // 10
-  d.imageSiz             = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+1)^3]; // |
-  d.imagePal             = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+2)^3]; // 11
-  d.imageFlags   = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+3)^3]; // |
-
-  if (d.imageW < 0)
-    d.imageW = (short)rdp.scissor_o.lr_x - (short)d.objX - d.imageW;
-  if (d.imageH < 0)
-    d.imageH = (short)rdp.scissor_o.lr_y - (short)d.objY - d.imageH;
-
-  FRDP ("#%d, #%d\n"
-    "objX: %f, scaleW: %f, imageW: %d\n"
-    "objY: %f, scaleH: %f, imageH: %d\n"
-    "size: %d, format: %d\n", rdp.tri_n, rdp.tri_n+1,
-    d.objX, d.scaleW, d.imageW, d.objY, d.scaleH, d.imageH, d.imageSiz, d.imageFmt);
-}
-
-static void uc6_init_tile(const DRAWOBJECT & d)
-{
-  // SetTile ()
-  TILE *tile = &rdp.tiles[0];
-  tile->format = d.imageFmt;      // RGBA
-  tile->size = d.imageSiz;                // 16-bit
-  tile->line = d.imageStride;
-  tile->t_mem = d.imageAdrs;
-  tile->palette = d.imagePal;
-  tile->clamp_t = 1;
-  tile->mirror_t = 0;
-  tile->mask_t = 0;
-  tile->shift_t = 0;
-  tile->clamp_s = 1;
-  tile->mirror_s = 0;
-  tile->mask_s = 0;
-  tile->shift_s = 0;
-
-  // SetTileSize ()
-  rdp.tiles[0].ul_s = 0;
-  rdp.tiles[0].ul_t = 0;
-  rdp.tiles[0].lr_s = (d.imageW>0)?d.imageW-1:0;
-  rdp.tiles[0].lr_t = (d.imageH>0)?d.imageH-1:0;
-}
-
+static void uc6_read_object_data (DRAWOBJECT & d)
+{
+  wxUint32 addr = segoffset(rdp.cmd1) >> 1;
+
+  d.objX            = ((short*)gfx.RDRAM)[(addr+0)^1] / 4.0f;               // 0
+  d.scaleW  = ((wxUint16 *)gfx.RDRAM)[(addr+1)^1] / 1024.0f;        // 1
+  d.imageW  = ((short*)gfx.RDRAM)[(addr+2)^1] >> 5;                 // 2, 3 is padding
+  d.objY            = ((short*)gfx.RDRAM)[(addr+4)^1] / 4.0f;               // 4
+  d.scaleH  = ((wxUint16 *)gfx.RDRAM)[(addr+5)^1] / 1024.0f;        // 5
+  d.imageH  = ((short*)gfx.RDRAM)[(addr+6)^1] >> 5;                 // 6, 7 is padding
+
+  d.imageStride = ((wxUint16 *)gfx.RDRAM)[(addr+8)^1];                  // 8
+  d.imageAdrs           = ((wxUint16 *)gfx.RDRAM)[(addr+9)^1];                  // 9
+  d.imageFmt             = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+0)^3]; // 10
+  d.imageSiz             = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+1)^3]; // |
+  d.imagePal             = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+2)^3]; // 11
+  d.imageFlags   = ((wxUint8 *)gfx.RDRAM)[(((addr+10)<<1)+3)^3]; // |
+
+  if (d.imageW < 0)
+    d.imageW = (short)rdp.scissor_o.lr_x - (short)d.objX - d.imageW;
+  if (d.imageH < 0)
+    d.imageH = (short)rdp.scissor_o.lr_y - (short)d.objY - d.imageH;
+
+  FRDP ("#%d, #%d\n"
+    "objX: %f, scaleW: %f, imageW: %d\n"
+    "objY: %f, scaleH: %f, imageH: %d\n"
+    "size: %d, format: %d\n", rdp.tri_n, rdp.tri_n+1,
+    d.objX, d.scaleW, d.imageW, d.objY, d.scaleH, d.imageH, d.imageSiz, d.imageFmt);
+}
+
+static void uc6_init_tile(const DRAWOBJECT & d)
+{
+  // SetTile ()
+  TILE *tile = &rdp.tiles[0];
+  tile->format = d.imageFmt;      // RGBA
+  tile->size = d.imageSiz;                // 16-bit
+  tile->line = d.imageStride;
+  tile->t_mem = d.imageAdrs;
+  tile->palette = d.imagePal;
+  tile->clamp_t = 1;
+  tile->mirror_t = 0;
+  tile->mask_t = 0;
+  tile->shift_t = 0;
+  tile->clamp_s = 1;
+  tile->mirror_s = 0;
+  tile->mask_s = 0;
+  tile->shift_s = 0;
+
+  // SetTileSize ()
+  rdp.tiles[0].ul_s = 0;
+  rdp.tiles[0].ul_t = 0;
+  rdp.tiles[0].lr_s = (d.imageW>0)?d.imageW-1:0;
+  rdp.tiles[0].lr_t = (d.imageH>0)?d.imageH-1:0;
+}
+
 static void uc6_obj_rectangle ()
 {
-  LRDP ("uc6:obj_rectangle ");
-  DRAWOBJECT d;
-  uc6_read_object_data(d);
-
+  LRDP ("uc6:obj_rectangle ");
+  DRAWOBJECT d;
+  uc6_read_object_data(d);
+
   if (d.imageAdrs > 4096)
   {
     FRDP("tmem: %08lx is out of bounds! return\n", d.imageAdrs);
@@ -1068,8 +1068,8 @@ static void uc6_obj_rectangle ()
     LRDP("Texture was not loaded! return\n");
     return;
   }
-
-  uc6_init_tile(d);
+
+  uc6_init_tile(d);
 
   float Z = set_sprite_combine_mode ();
 
@@ -1123,10 +1123,10 @@ static void uc6_obj_rectangle ()
 
 static void uc6_obj_sprite ()
 {
-  LRDP ("uc6:obj_sprite ");
-  DRAWOBJECT d;
-  uc6_read_object_data(d);
-  uc6_init_tile(d);
+  LRDP ("uc6:obj_sprite ");
+  DRAWOBJECT d;
+  uc6_read_object_data(d);
+  uc6_init_tile(d);
 
   float Z = set_sprite_combine_mode ();
 
@@ -1286,9 +1286,9 @@ static void uc6_DrawYUVImageToFrameBuffer(wxUint16 ul_x, wxUint16 ul_y, wxUint16
 
 static void uc6_obj_rectangle_r ()
 {
-  LRDP ("uc6:obj_rectangle_r ");
-  DRAWOBJECT d;
-  uc6_read_object_data(d);
+  LRDP ("uc6:obj_rectangle_r ");
+  DRAWOBJECT d;
+  uc6_read_object_data(d);
 
   if (d.imageFmt == 1 && (settings.hacks&hack_Ogre64)) //Ogre Battle needs to copy YUV texture to frame buffer
   {
@@ -1300,8 +1300,8 @@ static void uc6_obj_rectangle_r ()
     rdp.tri_n += 2;
     return;
   }
-
-  uc6_init_tile(d);
+
+  uc6_init_tile(d);
 
   float Z = set_sprite_combine_mode ();
 
@@ -1554,9 +1554,9 @@ void uc6_sprite2d ()
       return;
     }
 
-    const wxUint32 texsize = (d.imageW * d.imageH) << d.imageSiz >> 1;
-    const wxUint32 maxTexSize = rdp.tlut_mode < 2 ? 4096 : 2048;
-
+    const wxUint32 texsize = (d.imageW * d.imageH) << d.imageSiz >> 1;
+    const wxUint32 maxTexSize = rdp.tlut_mode < 2 ? 4096 : 2048;
+
     if (texsize > maxTexSize)
     {
       if (d.scaleX != 1)

From a155a74f48829d8966085eb2f435acdd440218aa Mon Sep 17 00:00:00 2001
From: unknown <rj150@hotmail.com>
Date: Wed, 11 Mar 2015 17:17:43 -0400
Subject: [PATCH 9/9] line break consistency fix:  TxUtil.cpp

---
 Source/GlideHQ/TxUtil.cpp | 2010 ++++++++++++++++++-------------------
 1 file changed, 1005 insertions(+), 1005 deletions(-)

diff --git a/Source/GlideHQ/TxUtil.cpp b/Source/GlideHQ/TxUtil.cpp
index 4e72cd8da..850adb6fb 100644
--- a/Source/GlideHQ/TxUtil.cpp
+++ b/Source/GlideHQ/TxUtil.cpp
@@ -1,1006 +1,1006 @@
-/*
- * Texture Filtering
- * Version:  1.0
- *
- * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
- * Email koolsmoky(at)users.sourceforge.net
- * Web   http://www.3dfxzone.it/koolsmoky
- *
- * this is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * this is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING.  If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "TxUtil.h"
-#include "TxDbg.h"
-#include <zlib/zlib.h>
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "TxUtil.h"
+#include "TxDbg.h"
+#include <zlib/zlib.h>
 #include <malloc.h>
-
-/*
- * External libraries
- ******************************************************************************/
-TxLoadLib::TxLoadLib()
-{
-#ifdef DXTN_DLL
-  if (!_dxtnlib)
-    _dxtnlib = LoadLibrary("dxtn");
-
-  if (_dxtnlib) {
-    if (!_tx_compress_dxtn)
-      _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
-
-    if (!_tx_compress_fxt1)
-      _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
-  }
-#else
-  _tx_compress_dxtn = tx_compress_dxtn;
-  _tx_compress_fxt1 = fxt1_encode;
-
-#endif
-}
-
-TxLoadLib::~TxLoadLib()
-{
-#ifdef DXTN_DLL
-  /* free dynamic library */
-  if (_dxtnlib)
-    FreeLibrary(_dxtnlib);
-#endif
-
-}
-
-fxtCompressTexFuncExt
-TxLoadLib::getfxtCompressTexFuncExt()
-{
-  return _tx_compress_fxt1;
-}
-
-dxtCompressTexFuncExt
-TxLoadLib::getdxtCompressTexFuncExt()
-{
-  return _tx_compress_dxtn;
-}
-
-
-/*
- * Utilities
- ******************************************************************************/
-uint32
-TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
-{
-  int dataSize = sizeofTx(width, height, format);
-
-  /* for now we use adler32 if something else is better
-   * we can simply swtich later
-   */
-  /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
-
-  /* zlib crc32 */
-  return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
-}
-
-int
-TxUtil::sizeofTx(int width, int height, uint16 format)
-{
-  int dataSize = 0;
-
-  /* a lookup table for the shifts would be better */
-  switch (format) {
-  case GR_TEXFMT_ARGB_CMP_FXT1:
-    dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
-    break;
-  case GR_TEXFMT_ARGB_CMP_DXT1:
-    dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
-    break;
-  case GR_TEXFMT_ARGB_CMP_DXT3:
-  case GR_TEXFMT_ARGB_CMP_DXT5:
-    dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
-    break;
-  case GR_TEXFMT_ALPHA_INTENSITY_44:
-  case GR_TEXFMT_ALPHA_8:
-  case GR_TEXFMT_INTENSITY_8:
-  case GR_TEXFMT_P_8:
-    dataSize = width * height;
-    break;
-  case GR_TEXFMT_ARGB_4444:
-  case GR_TEXFMT_ARGB_1555:
-  case GR_TEXFMT_RGB_565:
-  case GR_TEXFMT_ALPHA_INTENSITY_88:
-    dataSize = (width * height) << 1;
-    break;
-  case GR_TEXFMT_ARGB_8888:
-    dataSize = (width * height) << 2;
-    break;
-  default:
-    /* unsupported format */
-    DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
-    ;
-  }
-
-  return dataSize;
-}
-
-#if 0 /* unused */
-uint32
-TxUtil::chkAlpha(uint32* src, int width, int height)
-{
-  /* NOTE: _src must be ARGB8888
-   * return values
-   * 0x00000000: 8bit alpha
-   * 0x00000001: 1bit alpha
-   * 0xff000001: no alpha
-   */
-
-  int _size = width * height;
-  uint32 alpha = 0;
-
-  __asm {
-    mov esi, dword ptr [src];
-    mov ecx, dword ptr [_size];
-    mov ebx, 0xff000000;
-
-  tc1_loop:
-    mov eax, dword ptr [esi];
-    add esi, 4;
-
-    and eax, 0xff000000;
-    jz  alpha1bit;
-    cmp eax, 0xff000000;
-    je  alpha1bit;
-    jmp done;
-
-  alpha1bit:
-    and ebx, eax;
-    dec ecx;
-    jnz tc1_loop;
-
-    or  ebx, 0x00000001;
-    mov dword ptr [alpha], ebx;
-
-  done:
-  }
-
-  return alpha;
-}
-#endif
-
-uint32
-TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
-{
-  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
-   * any other custom checksum.
-   * TODO: use *_HIRESTEXTURE option. */
-
-  if (!src) return 0;
-
-  return RiceCRC32(src, width, height, size, rowStride);
-}
-
-uint64
-TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
-{
-  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
-   * any other custom checksum.
-   * TODO: use *_HIRESTEXTURE option. */
-  /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
-
-  if (!src) return 0;
-
-  uint64 crc64Ret = 0;
-
-  if (palette) {
-    uint32 crc32 = 0, cimax = 0;
-    switch (size & 0xff) {
-    case 1:
-      if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
-        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
-        crc64Ret <<= 32;
-        crc64Ret |= (uint64)crc32;
-      }
-      break;
-    case 0:
-      if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
-        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
-        crc64Ret <<= 32;
-        crc64Ret |= (uint64)crc32;
-      }
-    }
-  }
-  if (!crc64Ret) {
-    crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
-  }
-
-  return crc64Ret;
-}
-
-/*
-** Computes Adler32 checksum for a stream of data.
-**
-** From the specification found in RFC 1950: (ZLIB Compressed Data Format
-** Specification version 3.3)
-**
-** ADLER32 (Adler-32 checksum) This contains a checksum value of the
-** uncompressed data (excluding any dictionary data) computed according to
-** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
-** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
-**
-** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
-** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
-** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
-** as s2*65536 + s1 in most-significant-byte first (network) order.
-**
-** 8.2. The Adler-32 algorithm 
-**
-** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
-** provides an extremely low probability of undetected errors.
-**
-** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
-** so the modulo operation time is negligible. If the bytes are a, b, c,
-** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
-** unlike the first sum, which is just a checksum. That 65521 is prime is
-** important to avoid a possible large class of two-byte errors that leave
-** the check unchanged. (The Fletcher checksum uses 255, which is not prime
-** and which also makes the Fletcher check insensitive to single byte
-** changes 0 <-> 255.)
-**
-** The sum s1 is initialized to 1 instead of zero to make the length of
-** the sequence part of s2, so that the length does not have to be checked
-** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
-*/
-
-uint32
-TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
-{
-#if 1
-  /* zlib adler32 */
-  return adler32(dwAdler32, data, Len);
-#else
-  register uint32 s1 = dwAdler32 & 0xFFFF;
-  register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
-  int k;
-
-  while (Len > 0) {
-    /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-    k = (Len < 5552 ? Len : 5552);
-    Len -= k;
-    while (k--) {
-      s1 += *data++;
-      s2 += s1;
-    }
-    /* 65521 is the largest prime smaller than 65536 */
-    s1 %= 65521;
-    s2 %= 65521;
-  }
-
-  return (s2 << 16) | s1;
-#endif
-}
-
-uint32
-TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
-{
-  int i;
-  uint32 ret = 1;
-  uint32 width_in_bytes = width * size;
-
-  for (i = 0; i < height; i++) {
-    ret = Adler32(src, width_in_bytes, ret);
-    src += rowStride;
-  }
-
-  return ret;
-}
-
-/* Rice CRC32 for hires texture packs */
-/* NOTE: The following is used in Glide64 to calculate the CRC32
- * for Rice hires texture packs.
- *
- * BYTE* addr = (BYTE*)(gfx.RDRAM +
- *                     rdp.addr[rdp.tiles[tile].t_mem] +
- *                     (rdp.tiles[tile].ul_t * bpl) +
- *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
- * RiceCRC32(addr,
- *          rdp.tiles[tile].width,
- *          rdp.tiles[tile].height,
- *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
- *          bpl);
- */
-uint32
-TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
-{
-  /* NOTE: bytes_per_width must be equal or larger than 4 */
-
-  uint32 crc32Ret = 0;
-  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
-
-  /*if (bytes_per_width < 4) return 0;*/
-
-  try {
-#ifdef WIN32
-    __asm {
-      push ebx;
-      push esi;
-      push edi;
-
-      mov ecx, dword ptr [src];
-      mov eax, dword ptr [height];
-      mov edx, 0;
-      dec eax;
-
-    loop2:
-      mov ebx, dword ptr [bytes_per_width];
-      sub ebx, 4;
-
-    loop1:
-      mov esi, dword ptr [ecx+ebx];
-      xor esi, ebx;
-      rol edx, 4;
-      add edx, esi;
-      sub ebx, 4;
-      jge loop1;
-
-      xor esi, eax;
-      add edx, esi;
-      add ecx, dword ptr [rowStride];
-      dec eax;
-      jge loop2;
-
-      mov dword ptr [crc32Ret], edx;
-
-      pop edi;
-      pop esi;
-      pop ebx;
-    }
-#else
-    asm volatile(
-      "pushl %%ebx \n"
-      "pushl %%esi \n"
-      "pushl %%edi \n"
-
-      "movl %0, %%ecx \n"
-      "movl %1, %%eax \n"
-      "movl $0, %%edx \n"
-      "decl %%eax \n"
-
-      "0: \n"
-      "movl %2, %%ebx \n"
-      "subl $4, %%ebx \n"
-
-      "1: \n"
-      "movl (%%ecx,%%ebx), %%esi \n"
-      "xorl %%ebx, %%esi \n"
-      "roll $4, %%edx \n"
-      "addl %%esi, %%edx \n"
-      "subl $4, %%ebx \n"
-      "jge  1b \n"
-
-      "xorl %%eax, %%esi \n"
-      "addl %%esi, %%edx \n"
-      "addl %3, %%ecx \n"
-      "decl %%eax \n"
-      "jge  0b \n"
-
-      "movl %%edx, %4 \n"
-
-      "popl %%edi \n"
-      "popl %%esi \n"
-      "popl %%ebx \n"
-      :
-      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret)
-      : "memory", "cc"
-      );
-#endif
-  } catch(...) {
-    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
-  }
-
-  return crc32Ret;
-}
-
-boolean
-TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
-                        uint32* crc32, uint32* cimax)
-{
-  /* NOTE: bytes_per_width must be equal or larger than 4 */
-
-  uint32 crc32Ret = 0;
-  uint32 cimaxRet = 0;
-  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
-
-  /*if (bytes_per_width < 4) return 0;*/
-
-  /* 4bit CI */
-  try {
-#ifdef WIN32
-    __asm {
-      push ebx;
-      push esi;
-      push edi;
-
-      mov ecx, dword ptr [src];
-      mov eax, dword ptr [height];
-      mov edx, 0;
-      mov edi, 0;
-      dec eax;
-
-    loop2:
-      mov ebx, dword ptr [bytes_per_width];
-      sub ebx, 4;
-
-    loop1:
-      mov esi, dword ptr [ecx+ebx];
-
-      cmp edi, 0x0000000f;
-      je findmax0;
-
-      push ecx;
-      mov ecx, esi;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax8;
-      mov edi, ecx;
-
-    findmax8:
-      mov ecx, esi;
-      shr ecx, 4;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax7;
-      mov edi, ecx;
-
-    findmax7:
-      mov ecx, esi;
-      shr ecx, 8;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax6;
-      mov edi, ecx;
-
-    findmax6:
-      mov ecx, esi;
-      shr ecx, 12;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax5;
-      mov edi, ecx;
-
-    findmax5:
-      mov ecx, esi;
-      shr ecx, 16;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax4;
-      mov edi, ecx;
-
-    findmax4:
-      mov ecx, esi;
-      shr ecx, 20;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax3;
-      mov edi, ecx;
-
-    findmax3:
-      mov ecx, esi;
-      shr ecx, 24;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax2;
-      mov edi, ecx;
-
-    findmax2:
-      mov ecx, esi;
-      shr ecx, 28;
-      and ecx, 0x0000000f;
-      cmp ecx, edi;
-      jb  findmax1;
-      mov edi, ecx;
-
-    findmax1:
-      pop ecx;
-
-    findmax0:
-      xor esi, ebx;
-      rol edx, 4;
-      add edx, esi;
-      sub ebx, 4;
-      jge loop1;
-
-      xor esi, eax;
-      add edx, esi;
-      add ecx, dword ptr [rowStride];
-      dec eax;
-      jge loop2;
-
-      mov dword ptr [crc32Ret], edx;
-      mov dword ptr [cimaxRet], edi;
-
-      pop edi;
-      pop esi;
-      pop ebx;
-    }
-#else
-    asm volatile(
-      "pushl %%ebx \n"
-      "pushl %%esi \n"
-      "pushl %%edi \n"
-
-      "movl %0, %%ecx \n"
-      "movl %1, %%eax \n"
-      "movl $0, %%edx \n"
-      "movl $0, %%edi \n"
-      "decl %%eax \n"
-
-      "0: \n"
-      "movl %2, %%ebx \n"
-      "subl $4, %%ebx \n"
-
-      "1: \n"
-      "movl (%%ecx,%%ebx), %%esi \n"
-
-      "cmpl $0x0000000f, %%edi \n"
-      "je  10f \n"
-
-      "pushl %%ecx \n"
-      "movl %%esi, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   2f \n"
-      "movl %%ecx, %%edi \n"
-
-      "2: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $4, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   3f \n"
-      "movl %%ecx, %%edi \n"
-
-      "3: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $8, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   4f \n"
-      "movl %%ecx, %%edi \n"
-
-      "4: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $12, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   5f \n"
-      "movl %%ecx, %%edi \n"
-
-      "5: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $16, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   6f \n"
-      "movl %%ecx, %%edi \n"
-
-      "6: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $20, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   7f \n"
-      "movl %%ecx, %%edi \n"
-
-      "7: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $24, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   8f \n"
-      "movl %%ecx, %%edi \n"
-
-      "8: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $28, %%ecx \n"
-      "andl $0x0000000f, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   9f \n"
-      "movl %%ecx, %%edi \n"
-
-      "9: \n"
-      "popl %%ecx \n"
-
-      "10: \n"
-      "xorl %%ebx, %%esi \n"
-      "roll $4, %%edx \n"
-      "addl %%esi, %%edx \n"
-      "subl $4, %%ebx \n"
-      "jge  1b \n"
-
-      "xorl %%eax, %%esi \n"
-      "addl %%esi, %%edx \n"
-      "addl %3, %%ecx \n"
-      "decl %%eax \n"
-      "jge  0b \n"
-
-      "movl %%edx, %4 \n"
-      "movl %%edi, %5 \n"
-
-      "popl %%edi \n"
-      "popl %%esi \n"
-      "popl %%ebx \n"
-      :
-      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
-      : "memory", "cc"
-      );
-#endif
-  } catch(...) {
-    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
-  }
-
-  *crc32 = crc32Ret;
-  *cimax = cimaxRet;
-
-  return 1;
-}
-
-boolean
-TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
-                      uint32* crc32, uint32* cimax)
-{
-  /* NOTE: bytes_per_width must be equal or larger than 4 */
-
-  uint32 crc32Ret = 0;
-  uint32 cimaxRet = 0;
-  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
-
-  /*if (bytes_per_width < 4) return 0;*/
-
-  /* 8bit CI */
-  try {
-#ifdef WIN32
-    __asm {
-      push ebx;
-      push esi;
-      push edi;
-
-      mov ecx, dword ptr [src];
-      mov eax, dword ptr [height];
-      mov edx, 0;
-      mov edi, 0;
-      dec eax;
-
-    loop2:
-      mov ebx, dword ptr [bytes_per_width];
-      sub ebx, 4;
-
-    loop1:
-      mov esi, dword ptr [ecx+ebx];
-
-      cmp edi, 0x000000ff;
-      je findmax0;
-
-      push ecx;
-      mov ecx, esi;
-      and ecx, 0x000000ff;
-      cmp ecx, edi;
-      jb  findmax4;
-      mov edi, ecx;
-
-    findmax4:
-      mov ecx, esi;
-      shr ecx, 8;
-      and ecx, 0x000000ff;
-      cmp ecx, edi;
-      jb  findmax3;
-      mov edi, ecx;
-
-    findmax3:
-      mov ecx, esi;
-      shr ecx, 16;
-      and ecx, 0x000000ff;
-      cmp ecx, edi;
-      jb  findmax2;
-      mov edi, ecx;
-
-    findmax2:
-      mov ecx, esi;
-      shr ecx, 24;
-      and ecx, 0x000000ff;
-      cmp ecx, edi;
-      jb  findmax1;
-      mov edi, ecx;
-
-    findmax1:
-      pop ecx;
-
-    findmax0:
-      xor esi, ebx;
-      rol edx, 4;
-      add edx, esi;
-      sub ebx, 4;
-      jge loop1;
-
-      xor esi, eax;
-      add edx, esi;
-      add ecx, dword ptr [rowStride];
-      dec eax;
-      jge loop2;
-
-      mov dword ptr [crc32Ret], edx;
-      mov dword ptr [cimaxRet], edi;
-
-      pop edi;
-      pop esi;
-      pop ebx;
-    }
-#else
-    asm volatile(
-      "pushl %%ebx \n"
-      "pushl %%esi \n"
-      "pushl %%edi \n"
-
-      "movl %0, %%ecx \n"
-      "movl %1, %%eax \n"
-      "movl $0, %%edx \n"
-      "movl $0, %%edi \n"
-      "decl %%eax \n"
-
-      "0: \n"
-      "movl %2, %%ebx \n"
-      "subl $4, %%ebx \n"
-
-      "1: \n"
-      "movl (%%ecx,%%ebx), %%esi \n"
-
-      "cmpl $0x000000ff, %%edi \n"
-      "je   6f \n"
-
-      "pushl %%ecx \n"
-      "movl %%esi, %%ecx \n"
-      "andl $0x000000ff, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   2f \n"
-      "movl %%ecx, %%edi \n"
-
-      "2: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $8, %%ecx \n"
-      "andl $0x000000ff, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   3f \n"
-      "movl %%ecx, %%edi \n"
-
-      "3: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $16, %%ecx \n"
-      "andl $0x000000ff, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   4f \n"
-      "movl %%ecx, %%edi \n"
-
-      "4: \n"
-      "movl %%esi, %%ecx \n"
-      "shrl $24, %%ecx \n"
-      "andl $0x000000ff, %%ecx \n"
-      "cmpl %%edi, %%ecx \n"
-      "jb   5f \n"
-      "movl %%ecx, %%edi \n"
-
-      "5: \n"
-      "popl %%ecx \n"
-
-      "6: \n"
-      "xorl %%ebx, %%esi \n"
-      "roll $4, %%edx \n"
-      "addl %%esi, %%edx \n"
-      "subl $4, %%ebx \n"
-      "jge  1b \n"
-
-      "xorl %%eax, %%esi \n"
-      "addl %%esi, %%edx \n"
-      "addl %3, %%ecx \n"
-      "decl %%eax \n"
-      "jge  0b \n"
-
-      "movl %%edx, %4 \n"
-      "movl %%edi, %5 \n"
-
-      "popl %%edi \n"
-      "popl %%esi \n"
-      "popl %%ebx \n"
-      :
-      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
-      : "memory", "cc"
-      );
-#endif
-  } catch(...) {
-    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
-  }
-
-  *crc32 = crc32Ret;
-  *cimax = cimaxRet;
-
-  return 1;
-}
-
-int
-TxUtil::log2(int num)
-{
-  int i = 0;
-
-#if 1
-  if (!num) return 0;
-#ifdef WIN32
-  __asm {
-    mov eax, dword ptr [num];
-    bsr eax, eax;
-    mov dword ptr [i], eax;
-  }
-#else
-  asm volatile(
-    "movl %0, %%eax \n"
-    "bsrl %%eax, %%eax \n"
-    "movl %%eax, %1 \n"
-    :
-    : "m"(num), "m"(i)
-    : "memory", "cc"
-    );
-#endif
-#else
-  switch (num) {
-    case 1:    return 0;
-    case 2:    return 1;
-    case 4:    return 2;
-    case 8:    return 3;
-    case 16:   return 4;
-    case 32:   return 5;
-    case 64:   return 6;
-    case 128:  return 7;
-    case 256:  return 8;
-    case 512:  return 9;
-    case 1024:  return 10;
-    case 2048:  return 11;
-  }
-#endif
-
-  return i;
-}
-
-int
-TxUtil::grLodLog2(int w, int h)
-{
-  return (w >= h ? log2(w) : log2(h));
-}
-
-int
-TxUtil::grAspectRatioLog2(int w, int h)
-{
-  return (w >= h ? log2(w/h) : -log2(h/w));
-}
-
-int
-TxUtil::getNumberofProcessors()
-{
-  int numcore = 1;
-
-  /* number of logical processors per physical processor */
-  try {
-#ifdef WIN32
-#if 1
-    /* use win32 api */
-    SYSTEM_INFO siSysInfo;
-    ZeroMemory(&siSysInfo, sizeof(SYSTEM_INFO));
-    GetSystemInfo(&siSysInfo);
-    numcore = siSysInfo.dwNumberOfProcessors;
-#else
-    __asm {
-      push ebx;
-
-      mov eax, 1;
-      cpuid;
-      test edx, 0x10000000; /* check HTT */
-      jz uniproc;
-      and ebx, 0x00ff0000;  /* mask logical core counter bit */
-      shr ebx, 16;
-      mov dword ptr [numcore], ebx;
-    uniproc:
-
-      pop ebx;
-    }
-#endif
-#else
-    asm volatile(
-      "pushl %%ebx \n"
-
-      "movl $1, %%eax \n"
-      "cpuid \n"
-      "testl $0x10000000, %%edx \n"
-      "jz 0f \n"
-      "andl $0x00ff0000, %%ebx \n"
-      "shrl $16, %%ebx \n"
-      "movl %%ebx, %0 \n"
-      "0: \n"
-
-      "popl %%ebx \n"
-      :
-      : "m"(numcore)
-      : "memory", "cc"
-      );
-#endif
-  } catch(...) {
-    DBG_INFO(80, L"Error: number of processor detection failed!\n");
-  }
-
-  if (numcore > MAX_NUMCORE) numcore = MAX_NUMCORE;
-
-  DBG_INFO(80, L"Number of processors : %d\n", numcore);
-
-  return numcore;
-}
-
-
-/*
- * Memory buffers for texture manipulations
- ******************************************************************************/
-TxMemBuf::TxMemBuf()
-{
-  int i;
-  for (i = 0; i < 2; i++) {
-    _tex[i] = NULL;
-    _size[i] = 0;
-  }
-}
-
-TxMemBuf::~TxMemBuf()
-{
-  shutdown();
-}
-
-boolean
-TxMemBuf::init(int maxwidth, int maxheight)
-{
-  int i;
-  for (i = 0; i < 2; i++) {
-    if (!_tex[i]) {
-      _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
-      _size[i] = maxwidth * maxheight * 4;
-    }
-
-    if (!_tex[i]) {
-      shutdown();
-      return 0;
-    }
-  }
-  return 1;
-}
-
-void
-TxMemBuf::shutdown()
-{
-  int i;
-  for (i = 0; i < 2; i++) {
-    if (_tex[i]) free(_tex[i]);
-    _tex[i] = NULL;
-    _size[i] = 0;
-  }
-}
-
-uint8*
-TxMemBuf::get(unsigned int num)
-{
-  return ((num < 2) ? _tex[num] : NULL);
-}
-
-uint32
-TxMemBuf::size_of(unsigned int num)
-{
-  return ((num < 2) ? _size[num] : 0);
-}
+
+/*
+ * External libraries
+ ******************************************************************************/
+TxLoadLib::TxLoadLib()
+{
+#ifdef DXTN_DLL
+  if (!_dxtnlib)
+    _dxtnlib = LoadLibrary("dxtn");
+
+  if (_dxtnlib) {
+    if (!_tx_compress_dxtn)
+      _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
+
+    if (!_tx_compress_fxt1)
+      _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
+  }
+#else
+  _tx_compress_dxtn = tx_compress_dxtn;
+  _tx_compress_fxt1 = fxt1_encode;
+
+#endif
+}
+
+TxLoadLib::~TxLoadLib()
+{
+#ifdef DXTN_DLL
+  /* free dynamic library */
+  if (_dxtnlib)
+    FreeLibrary(_dxtnlib);
+#endif
+
+}
+
+fxtCompressTexFuncExt
+TxLoadLib::getfxtCompressTexFuncExt()
+{
+  return _tx_compress_fxt1;
+}
+
+dxtCompressTexFuncExt
+TxLoadLib::getdxtCompressTexFuncExt()
+{
+  return _tx_compress_dxtn;
+}
+
+
+/*
+ * Utilities
+ ******************************************************************************/
+uint32
+TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
+{
+  int dataSize = sizeofTx(width, height, format);
+
+  /* for now we use adler32 if something else is better
+   * we can simply swtich later
+   */
+  /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
+
+  /* zlib crc32 */
+  return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
+}
+
+int
+TxUtil::sizeofTx(int width, int height, uint16 format)
+{
+  int dataSize = 0;
+
+  /* a lookup table for the shifts would be better */
+  switch (format) {
+  case GR_TEXFMT_ARGB_CMP_FXT1:
+    dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
+    break;
+  case GR_TEXFMT_ARGB_CMP_DXT1:
+    dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
+    break;
+  case GR_TEXFMT_ARGB_CMP_DXT3:
+  case GR_TEXFMT_ARGB_CMP_DXT5:
+    dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
+    break;
+  case GR_TEXFMT_ALPHA_INTENSITY_44:
+  case GR_TEXFMT_ALPHA_8:
+  case GR_TEXFMT_INTENSITY_8:
+  case GR_TEXFMT_P_8:
+    dataSize = width * height;
+    break;
+  case GR_TEXFMT_ARGB_4444:
+  case GR_TEXFMT_ARGB_1555:
+  case GR_TEXFMT_RGB_565:
+  case GR_TEXFMT_ALPHA_INTENSITY_88:
+    dataSize = (width * height) << 1;
+    break;
+  case GR_TEXFMT_ARGB_8888:
+    dataSize = (width * height) << 2;
+    break;
+  default:
+    /* unsupported format */
+    DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
+    ;
+  }
+
+  return dataSize;
+}
+
+#if 0 /* unused */
+uint32
+TxUtil::chkAlpha(uint32* src, int width, int height)
+{
+  /* NOTE: _src must be ARGB8888
+   * return values
+   * 0x00000000: 8bit alpha
+   * 0x00000001: 1bit alpha
+   * 0xff000001: no alpha
+   */
+
+  int _size = width * height;
+  uint32 alpha = 0;
+
+  __asm {
+    mov esi, dword ptr [src];
+    mov ecx, dword ptr [_size];
+    mov ebx, 0xff000000;
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xff000000;
+    jz  alpha1bit;
+    cmp eax, 0xff000000;
+    je  alpha1bit;
+    jmp done;
+
+  alpha1bit:
+    and ebx, eax;
+    dec ecx;
+    jnz tc1_loop;
+
+    or  ebx, 0x00000001;
+    mov dword ptr [alpha], ebx;
+
+  done:
+  }
+
+  return alpha;
+}
+#endif
+
+uint32
+TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
+{
+  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
+   * any other custom checksum.
+   * TODO: use *_HIRESTEXTURE option. */
+
+  if (!src) return 0;
+
+  return RiceCRC32(src, width, height, size, rowStride);
+}
+
+uint64
+TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
+   * any other custom checksum.
+   * TODO: use *_HIRESTEXTURE option. */
+  /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
+
+  if (!src) return 0;
+
+  uint64 crc64Ret = 0;
+
+  if (palette) {
+    uint32 crc32 = 0, cimax = 0;
+    switch (size & 0xff) {
+    case 1:
+      if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
+        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
+        crc64Ret <<= 32;
+        crc64Ret |= (uint64)crc32;
+      }
+      break;
+    case 0:
+      if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
+        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
+        crc64Ret <<= 32;
+        crc64Ret |= (uint64)crc32;
+      }
+    }
+  }
+  if (!crc64Ret) {
+    crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
+  }
+
+  return crc64Ret;
+}
+
+/*
+** Computes Adler32 checksum for a stream of data.
+**
+** From the specification found in RFC 1950: (ZLIB Compressed Data Format
+** Specification version 3.3)
+**
+** ADLER32 (Adler-32 checksum) This contains a checksum value of the
+** uncompressed data (excluding any dictionary data) computed according to
+** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
+** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
+**
+** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
+** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
+** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
+** as s2*65536 + s1 in most-significant-byte first (network) order.
+**
+** 8.2. The Adler-32 algorithm 
+**
+** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
+** provides an extremely low probability of undetected errors.
+**
+** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
+** so the modulo operation time is negligible. If the bytes are a, b, c,
+** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
+** unlike the first sum, which is just a checksum. That 65521 is prime is
+** important to avoid a possible large class of two-byte errors that leave
+** the check unchanged. (The Fletcher checksum uses 255, which is not prime
+** and which also makes the Fletcher check insensitive to single byte
+** changes 0 <-> 255.)
+**
+** The sum s1 is initialized to 1 instead of zero to make the length of
+** the sequence part of s2, so that the length does not have to be checked
+** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
+*/
+
+uint32
+TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
+{
+#if 1
+  /* zlib adler32 */
+  return adler32(dwAdler32, data, Len);
+#else
+  register uint32 s1 = dwAdler32 & 0xFFFF;
+  register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
+  int k;
+
+  while (Len > 0) {
+    /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+    k = (Len < 5552 ? Len : 5552);
+    Len -= k;
+    while (k--) {
+      s1 += *data++;
+      s2 += s1;
+    }
+    /* 65521 is the largest prime smaller than 65536 */
+    s1 %= 65521;
+    s2 %= 65521;
+  }
+
+  return (s2 << 16) | s1;
+#endif
+}
+
+uint32
+TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
+{
+  int i;
+  uint32 ret = 1;
+  uint32 width_in_bytes = width * size;
+
+  for (i = 0; i < height; i++) {
+    ret = Adler32(src, width_in_bytes, ret);
+    src += rowStride;
+  }
+
+  return ret;
+}
+
+/* Rice CRC32 for hires texture packs */
+/* NOTE: The following is used in Glide64 to calculate the CRC32
+ * for Rice hires texture packs.
+ *
+ * BYTE* addr = (BYTE*)(gfx.RDRAM +
+ *                     rdp.addr[rdp.tiles[tile].t_mem] +
+ *                     (rdp.tiles[tile].ul_t * bpl) +
+ *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
+ * RiceCRC32(addr,
+ *          rdp.tiles[tile].width,
+ *          rdp.tiles[tile].height,
+ *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
+ *          bpl);
+ */
+uint32
+TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  try {
+#ifdef WIN32
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  return crc32Ret;
+}
+
+boolean
+TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  uint32 cimaxRet = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  /* 4bit CI */
+  try {
+#ifdef WIN32
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      mov edi, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+
+      cmp edi, 0x0000000f;
+      je findmax0;
+
+      push ecx;
+      mov ecx, esi;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax8;
+      mov edi, ecx;
+
+    findmax8:
+      mov ecx, esi;
+      shr ecx, 4;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax7;
+      mov edi, ecx;
+
+    findmax7:
+      mov ecx, esi;
+      shr ecx, 8;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax6;
+      mov edi, ecx;
+
+    findmax6:
+      mov ecx, esi;
+      shr ecx, 12;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax5;
+      mov edi, ecx;
+
+    findmax5:
+      mov ecx, esi;
+      shr ecx, 16;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax4;
+      mov edi, ecx;
+
+    findmax4:
+      mov ecx, esi;
+      shr ecx, 20;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax3;
+      mov edi, ecx;
+
+    findmax3:
+      mov ecx, esi;
+      shr ecx, 24;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax2;
+      mov edi, ecx;
+
+    findmax2:
+      mov ecx, esi;
+      shr ecx, 28;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax1;
+      mov edi, ecx;
+
+    findmax1:
+      pop ecx;
+
+    findmax0:
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+      mov dword ptr [cimaxRet], edi;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "movl $0, %%edi \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+
+      "cmpl $0x0000000f, %%edi \n"
+      "je  10f \n"
+
+      "pushl %%ecx \n"
+      "movl %%esi, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   2f \n"
+      "movl %%ecx, %%edi \n"
+
+      "2: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $4, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   3f \n"
+      "movl %%ecx, %%edi \n"
+
+      "3: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $8, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   4f \n"
+      "movl %%ecx, %%edi \n"
+
+      "4: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $12, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   5f \n"
+      "movl %%ecx, %%edi \n"
+
+      "5: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $16, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   6f \n"
+      "movl %%ecx, %%edi \n"
+
+      "6: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $20, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   7f \n"
+      "movl %%ecx, %%edi \n"
+
+      "7: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $24, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   8f \n"
+      "movl %%ecx, %%edi \n"
+
+      "8: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $28, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   9f \n"
+      "movl %%ecx, %%edi \n"
+
+      "9: \n"
+      "popl %%ecx \n"
+
+      "10: \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+      "movl %%edi, %5 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  *crc32 = crc32Ret;
+  *cimax = cimaxRet;
+
+  return 1;
+}
+
+boolean
+TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
+                      uint32* crc32, uint32* cimax)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  uint32 cimaxRet = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  /* 8bit CI */
+  try {
+#ifdef WIN32
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      mov edi, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+
+      cmp edi, 0x000000ff;
+      je findmax0;
+
+      push ecx;
+      mov ecx, esi;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax4;
+      mov edi, ecx;
+
+    findmax4:
+      mov ecx, esi;
+      shr ecx, 8;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax3;
+      mov edi, ecx;
+
+    findmax3:
+      mov ecx, esi;
+      shr ecx, 16;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax2;
+      mov edi, ecx;
+
+    findmax2:
+      mov ecx, esi;
+      shr ecx, 24;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax1;
+      mov edi, ecx;
+
+    findmax1:
+      pop ecx;
+
+    findmax0:
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+      mov dword ptr [cimaxRet], edi;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "movl $0, %%edi \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+
+      "cmpl $0x000000ff, %%edi \n"
+      "je   6f \n"
+
+      "pushl %%ecx \n"
+      "movl %%esi, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   2f \n"
+      "movl %%ecx, %%edi \n"
+
+      "2: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $8, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   3f \n"
+      "movl %%ecx, %%edi \n"
+
+      "3: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $16, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   4f \n"
+      "movl %%ecx, %%edi \n"
+
+      "4: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $24, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   5f \n"
+      "movl %%ecx, %%edi \n"
+
+      "5: \n"
+      "popl %%ecx \n"
+
+      "6: \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+      "movl %%edi, %5 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  *crc32 = crc32Ret;
+  *cimax = cimaxRet;
+
+  return 1;
+}
+
+int
+TxUtil::log2(int num)
+{
+  int i = 0;
+
+#if 1
+  if (!num) return 0;
+#ifdef WIN32
+  __asm {
+    mov eax, dword ptr [num];
+    bsr eax, eax;
+    mov dword ptr [i], eax;
+  }
+#else
+  asm volatile(
+    "movl %0, %%eax \n"
+    "bsrl %%eax, %%eax \n"
+    "movl %%eax, %1 \n"
+    :
+    : "m"(num), "m"(i)
+    : "memory", "cc"
+    );
+#endif
+#else
+  switch (num) {
+    case 1:    return 0;
+    case 2:    return 1;
+    case 4:    return 2;
+    case 8:    return 3;
+    case 16:   return 4;
+    case 32:   return 5;
+    case 64:   return 6;
+    case 128:  return 7;
+    case 256:  return 8;
+    case 512:  return 9;
+    case 1024:  return 10;
+    case 2048:  return 11;
+  }
+#endif
+
+  return i;
+}
+
+int
+TxUtil::grLodLog2(int w, int h)
+{
+  return (w >= h ? log2(w) : log2(h));
+}
+
+int
+TxUtil::grAspectRatioLog2(int w, int h)
+{
+  return (w >= h ? log2(w/h) : -log2(h/w));
+}
+
+int
+TxUtil::getNumberofProcessors()
+{
+  int numcore = 1;
+
+  /* number of logical processors per physical processor */
+  try {
+#ifdef WIN32
+#if 1
+    /* use win32 api */
+    SYSTEM_INFO siSysInfo;
+    ZeroMemory(&siSysInfo, sizeof(SYSTEM_INFO));
+    GetSystemInfo(&siSysInfo);
+    numcore = siSysInfo.dwNumberOfProcessors;
+#else
+    __asm {
+      push ebx;
+
+      mov eax, 1;
+      cpuid;
+      test edx, 0x10000000; /* check HTT */
+      jz uniproc;
+      and ebx, 0x00ff0000;  /* mask logical core counter bit */
+      shr ebx, 16;
+      mov dword ptr [numcore], ebx;
+    uniproc:
+
+      pop ebx;
+    }
+#endif
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+
+      "movl $1, %%eax \n"
+      "cpuid \n"
+      "testl $0x10000000, %%edx \n"
+      "jz 0f \n"
+      "andl $0x00ff0000, %%ebx \n"
+      "shrl $16, %%ebx \n"
+      "movl %%ebx, %0 \n"
+      "0: \n"
+
+      "popl %%ebx \n"
+      :
+      : "m"(numcore)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: number of processor detection failed!\n");
+  }
+
+  if (numcore > MAX_NUMCORE) numcore = MAX_NUMCORE;
+
+  DBG_INFO(80, L"Number of processors : %d\n", numcore);
+
+  return numcore;
+}
+
+
+/*
+ * Memory buffers for texture manipulations
+ ******************************************************************************/
+TxMemBuf::TxMemBuf()
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    _tex[i] = NULL;
+    _size[i] = 0;
+  }
+}
+
+TxMemBuf::~TxMemBuf()
+{
+  shutdown();
+}
+
+boolean
+TxMemBuf::init(int maxwidth, int maxheight)
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    if (!_tex[i]) {
+      _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
+      _size[i] = maxwidth * maxheight * 4;
+    }
+
+    if (!_tex[i]) {
+      shutdown();
+      return 0;
+    }
+  }
+  return 1;
+}
+
+void
+TxMemBuf::shutdown()
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    if (_tex[i]) free(_tex[i]);
+    _tex[i] = NULL;
+    _size[i] = 0;
+  }
+}
+
+uint8*
+TxMemBuf::get(unsigned int num)
+{
+  return ((num < 2) ? _tex[num] : NULL);
+}
+
+uint32
+TxMemBuf::size_of(unsigned int num)
+{
+  return ((num < 2) ? _size[num] : 0);
+}