zzogl-pg: Part 2 of the re-formatting; ran AStyle over the headers.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2932 96395faa-99c1-11dd-bbfe-3dabce05a288
2010-05-01 22:54:23 +00:00 · 2010-05-01 22:54:23 +00:00 · 12ad5308ed
parent a6c4df49ea
commit 12ad5308ed
12 changed files with 1453 additions and 1123 deletions
--- a/plugins/zzogl-pg/opengl/GS.h
+++ b/plugins/zzogl-pg/opengl/GS.h
@ -36,6 +36,7 @@ using namespace std;

 class GLWindow
 {
+
 	private:
 #ifdef GL_X11_WINDOW
 		Display *glDisplay;
@ -248,6 +249,7 @@ extern u8* g_pBasePS2Mem;
 	(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)

 // PS2 vertex
+
 struct VertexGPU
 {
 	// gained from XYZ2, XYZ3, XYZF2, XYZF3,
@ -264,6 +266,7 @@ struct VertexGPU
 };

 // Almost same with previous, controlled by prim.fst flagf
+
 struct Vertex
 {
 	u16 x, y, f, resv0;		// note: xy is 12d3
@ -281,7 +284,8 @@ extern int ppf;

 // PSM values
 // PSM types == Texture Storage Format
-enum PSM_value{
+enum PSM_value
+{
 	PSMCT32		= 0,		// 000000
 	PSMCT24		= 1,		// 000001
 	PSMCT16		= 2,		// 000010
@ -328,7 +332,8 @@ inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}

 //----------------------- Data from registers -----------------------

-typedef union {
+typedef union
+{
 	s64 SD;
 	u64 UD;
 	s32 SL[2];
@ -340,7 +345,9 @@ typedef union {
 } reg64;

 /* general purpose regs structs */
-typedef struct {
+
+typedef struct
+{
 	int fbp;
 	int fbw;
 	int fbh;
@ -349,7 +356,8 @@ typedef struct {
 } frameInfo;

 // Create frame structure from known data
-inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
+inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm)
+{
 	frameInfo frame;
 	frame.fbp = fbp;
 	frame.fbw = fbw;
@ -359,11 +367,14 @@ inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
 	return frame;
 }

-typedef struct {
+typedef struct 
+{
 	u16 prim;

-	union {
-		struct {
+	union 
+	{
+		struct 
+		{
 			u16 iip : 1;
 			u16 tme : 1;
 			u16 fge : 1;
@ -380,8 +391,10 @@ typedef struct {

 extern primInfo *prim;

-typedef union {
-	struct {
+typedef union 
+{
+	struct 
+	{
 		u32 ate : 1;
 		u32 atst : 3;
 		u32 aref : 8;
@ -395,13 +408,15 @@ typedef union {
 	u32 _val;
 } pixTest;

-typedef struct {
+typedef struct
+{
 	int bp;
 	int bw;
 	int psm;
 } bufInfo;

-typedef struct {
+typedef struct
+{
 	int tbp0;
 	int tbw;
 	int cbp;
@ -432,13 +447,17 @@ union tex_0_info
 		u64 csa : 5;
 		u64 cld : 3;
 	};
+
 	u64 _u64;
 	u32 _u32[2];
 	u16 _u16[4];
 	u8 _u8[8];
 	tex_0_info(u64 data) { _u64 = data; }
+
 	tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; }
+
 	tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; }
+
 	u32 tbw_mult()
 	{
 		if (tbw == 0)
@ -446,26 +465,34 @@ union tex_0_info
 		else
 			return ((u32)tbw << 6);
 	}
+
 	u32 psm_fix()
 	{
-	//	printf ("psm %d\n", psm);
-		if ( psm == 9 ) return 1;
+		//	printf ("psm %d\n", psm);
+		if (psm == 9) return 1;
+
 		return psm;
 	}
+
 	u32 tw_exp()
 	{
-		if (tw > 10) return (1<<10);
-		return (1<<tw);
+		if (tw > 10) return (1 << 10);
+
+		return (1 << tw);
 	}
+
 	u32 th_exp()
 	{
-		if (th > 10) return (1<<10);
-		return (1<<th);
+		if (th > 10) return (1 << 10);
+
+		return (1 << th);
 	}
+
 	u32 cpsm_fix()
 	{
 		return cpsm & 0xe;
 	}
+
 	u32 csa_fix()
 	{
 		if (cpsm < 2)
@ -480,7 +507,8 @@ union tex_0_info
 #define TEX_HIGHLIGHT 2
 #define TEX_HIGHLIGHT2 3

-typedef struct {
+typedef struct
+{
 	int lcm;
 	int mxl;
 	int mmag;
@ -490,7 +518,8 @@ typedef struct {
 	int k;
 } tex1Info;

-typedef struct {
+typedef struct
+{
 	int wms;
 	int wmt;
 	int minu;
@ -499,24 +528,28 @@ typedef struct {
 	int maxv;
 } clampInfo;

-typedef struct {
+typedef struct
+{
 	int cbw;
 	int cou;
 	int cov;
 } clutInfo;

-typedef struct {
+typedef struct
+{
 	int tbp[3];
 	int tbw[3];
 } miptbpInfo;

-typedef struct {
+typedef struct
+{
 	u16 aem;
 	u8 ta[2];
 	float fta[2];
 } texaInfo;

-typedef struct {
+typedef struct
+{
 	int sx;
 	int sy;
 	int dx;
@ -524,9 +557,12 @@ typedef struct {
 	int dir;
 } trxposInfo;

-typedef struct {
-	union {
-		struct {
+typedef struct 
+{
+	union 
+	{
+		struct 
+		{
 			u8 a : 2;
 			u8 b : 2;
 			u8 c : 2;
@ -538,17 +574,20 @@ typedef struct {
 	u8 fix : 8;
 } alphaInfo;

-typedef struct {
+typedef struct
+{
 	u16 zbp;		// u16 address / 64
 	u8 psm;
 	u8 zmsk;
 } zbufInfo;

-typedef struct {
+typedef struct
+{
 	int fba;
 } fbaInfo;

-typedef struct {
+typedef struct
+{
 	Vertex gsvertex[3];
 	u32 rgba;
 	float q;
@ -593,36 +632,41 @@ extern GSinternal gs;
 static __forceinline u16 RGBA32to16(u32 c)
 {
 	return (u16)((((c) & 0x000000f8) >>  3) |
-				(((c) & 0x0000f800) >>  6) |
-				(((c) & 0x00f80000) >>  9) |
-				(((c) & 0x80000000) >> 16));
+				 (((c) & 0x0000f800) >>  6) |
+				 (((c) & 0x00f80000) >>  9) |
+				 (((c) & 0x80000000) >> 16));
 }

 static __forceinline u32 RGBA16to32(u16 c)
 {
-	return 	(((c) & 0x001f) <<  3) |
-			(((c) & 0x03e0) <<  6) |
-			(((c) & 0x7c00) <<  9) |
-			(((c) & 0x8000) ? 0xff000000 : 0);
+	return	(((c) & 0x001f) <<  3) |
+		   (((c) & 0x03e0) <<  6) |
+		   (((c) & 0x7c00) <<  9) |
+		   (((c) & 0x8000) ? 0xff000000 : 0);
 }

 // converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
 // f is a u16
-static __forceinline u16 Float16ToBYTE(u16 f) {
+static __forceinline u16 Float16ToBYTE(u16 f)
+{
 	//assert( !(f & 0x8000) );
-	if( f & 0x8000 ) return 0;
+	if (f & 0x8000) return 0;
+
+	u16 d = ((((f & 0x3ff) | 0x400) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));

-	u16 d = ((((f&0x3ff)|0x400)*255)>>(10-((f>>10)&0x1f)+15));
 	return d > 255 ? 255 : d;
 }

-static __forceinline u16 Float16ToALPHA(u16 f) {
+static __forceinline u16 Float16ToALPHA(u16 f)
+{
 	//assert( !(f & 0x8000) );
-	if( f & 0x8000 ) return 0;
+	if (f & 0x8000) return 0;

 	// round up instead of down (crash and burn), too much and charlie breaks
-	u16 d = (((((f&0x3ff)|0x400))*255)>>(10-((f>>10)&0x1f)+15));
-	d = (d)>>1;
+	u16 d = (((((f & 0x3ff) | 0x400)) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
+
+	d = (d) >> 1;
+
 	return d > 255 ? 255 : d;
 }

@ -650,12 +694,14 @@ static __forceinline u16 Float16ToALPHA(u16 f) {

 inline float Clamp(float fx, float fmin, float fmax)
 {
-	if( fx < fmin ) return fmin;
+	if (fx < fmin) return fmin;
+
 	return fx > fmax ? fmax : fx;
 }

 // PSMT16, 16S have shorter color per pixel, also cluted textures with half storage.
-inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
+inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0)
+{
 	if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1))
 		return true;
 	else
@ -670,7 +716,7 @@ inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
 static __forceinline int ZZOglGet_tbp0_TexBits(u32 data)
 {
 	//return tex_0_info(data).tbp0;
-	return (data	  ) & 0x3fff;
+	return (data) & 0x3fff;
 }

 // Obtain tbw -- Texture Buffer Width (Texels/64) -- from data, do not multiply to 64. Bits 14-19
@ -686,6 +732,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
 {
 	//return text_0_info(data).tbw_mult();
 	int result = ZZOglGet_tbw_TexBits(data);
+
 	if (result == 0)
 		return 64;
 	else
@ -697,7 +744,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
 static __forceinline int ZZOglGet_psm_TexBits(u32 data)
 {
 	//return tex_0_info(data).psm;
-	return 	((data >> 20) & 0x3f);
+	return	((data >> 20) & 0x3f);
 }

 // Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. Fix incorrect psm == 9
@ -706,7 +753,9 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
 	//return tex_0_info(data).psm_fix();
 	int result = ZZOglGet_psm_TexBits(data) ;
 //	printf ("result %d\n", result);
-	if ( result == 9 ) result = 1;
+
+	if (result == 9) result = 1;
+
 	return result;
 }

@ -715,7 +764,7 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
 static __forceinline u16 ZZOglGet_tw_TexBits(u32 data)
 {
 	//return tex_0_info(data).tw;
-	return 	((data >> 26) & 0xf);
+	return	((data >> 26) & 0xf);
 }

 // Obtain tw -- Texture Width (Width = TW) -- from data. Width could newer be more than 1024.
@ -723,8 +772,10 @@ static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data)
 {
 	//return tex_0_info(data).tw_exp();
 	u16 result = ZZOglGet_tw_TexBits(data);
+
 	if (result > 10) result = 10;
-	return (1<<result);
+
+	return (1 << result);
 }

 // TH set at the border of upper and higher words.
@ -741,8 +792,10 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
 {
 	//return tex_0_info(dataLO, dataHI).th_exp();
 	u16 result = ZZOglGet_th_TexBits(dataLO, dataHI);
+
 	if (result > 10) result = 10;
-	return 	(1<<result);
+
+	return	(1 << result);
 }

 // Tex0Info bits, higher word.
@ -751,7 +804,7 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
 static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
 {
 	//return tex_0_info(0, data).tcc;
-	return  ((data >>  2) & 0x1);
+	return ((data >>  2) & 0x1);
 }

 // Obtain tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2) -- from data. Bit 4-5
@ -759,7 +812,7 @@ static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
 static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
 {
 	//return tex_0_info(0, data).tfx;
-	return  ((data >>  3) & 0x3);
+	return ((data >>  3) & 0x3);
 }

 // Obtain cbp from data -- Clut Buffer Base Pointer (Address/256) -- Bits 5-18
@ -767,7 +820,7 @@ static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
 static __forceinline int ZZOglGet_cbp_TexBits(u32 data)
 {
 	//return tex_0_info(0, data).cbp;
-	return  ((data >>  5) & 0x3fff);
+	return ((data >>  5) & 0x3fff);
 }

 // Obtain cpsm from data -- Clut pixel Storage Format -- Bits 19-22. 22nd is at no use.
@ -794,7 +847,7 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
 {
 	//return tex_0_info(0, data).csa_fix();

-	if ((data & 0x700000) == 0 ) // it is cpsm < 2 check
+	if ((data & 0x700000) == 0)  // it is cpsm < 2 check
 		return ((data >> 24) & 0xf);
 	else
 		return ((data >> 24) & 0x1f);
@ -805,79 +858,81 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
 static __forceinline u8 ZZOglGet_cld_TexBits(u32 data)
 {
 	//return tex_0_info(0, data).cld;
-	return  ((data >> 29) & 0x7);
+	return ((data >> 29) & 0x7);
 }

 //-------------------------- frames
 // FrameInfo bits.
 // Obtain fbp -- frame Buffer Base Pointer (Word Address/2048) -- from data. Bits 0-15
-inline int
-ZZOglGet_fbp_FrameBits(u32 data) {
-	return ((data      ) & 0x1ff);
+inline int ZZOglGet_fbp_FrameBits(u32 data)
+{
+	return ((data) & 0x1ff);
 }

-// So we got adress / 64, henceby frame fbp and tex tbp have the same dimension -- "real adress" is x64.
-inline int
-ZZOglGet_fbp_FrameBitsMult(u32 data) {
+// So we got address / 64, henceby frame fbp and tex tbp have the same dimension -- "real address" is x64.
+inline int ZZOglGet_fbp_FrameBitsMult(u32 data)
+{
 	return (ZZOglGet_fbp_FrameBits(data) << 5);
 }

 // Obtain fbw -- width (Texels/64) -- from data. Bits 16-23
-inline int
-ZZOglGet_fbw_FrameBits(u32 data) {
+inline int ZZOglGet_fbw_FrameBits(u32 data)
+{
 	return ((data >> 16) & 0x3f);
 }

-inline int
-ZZOglGet_fbw_FrameBitsMult(u32 data) {
+inline int ZZOglGet_fbw_FrameBitsMult(u32 data)
+{
 	return (ZZOglGet_fbw_FrameBits(data) << 6);
 }


 // Obtain psm -- Pixel Storage Format -- from data. Bits 24-29.
 // (data & 0x3f000000) >> 24
-inline int
-ZZOglGet_psm_FrameBits(u32 data) {
-	return 	((data >> 24) & 0x3f);
+inline int ZZOglGet_psm_FrameBits(u32 data)
+{
+	return	((data >> 24) & 0x3f);
 }

 // Function for calculating overal height from frame data.
-inline int
-ZZOgl_fbh_Calc (int fbp, int fbw, int psm) {
-	int fbh = ( 1024 * 1024 - 64 * fbp ) / fbw;
+inline int ZZOgl_fbh_Calc(int fbp, int fbw, int psm)
+{
+	int fbh = (1024 * 1024 - 64 * fbp) / fbw;
 	fbh &= ~0x1f;
-	if (PSMT_ISHALF(psm))
-		fbh *= 2;
-	if (fbh > 1024)
-		fbh = 1024;
+
+	if (PSMT_ISHALF(psm)) fbh *= 2;
+	if (fbh > 1024) fbh = 1024;
+
 	return fbh ;
 }
-inline int
-ZZOgl_fbh_Calc (frameInfo frame) {
+
+inline int ZZOgl_fbh_Calc(frameInfo frame)
+{
 	return ZZOgl_fbh_Calc(frame.fbp, frame.fbw, frame.psm);
 }

 // Calculate fbh from data, It does not set in register
-inline int
-ZZOglGet_fbh_FrameBitsCalc (u32 data) {
+inline int ZZOglGet_fbh_FrameBitsCalc(u32 data)
+{
 	int fbh = 0;
 	int fbp = ZZOglGet_fbp_FrameBits(data);
 	int fbw = ZZOglGet_fbw_FrameBits(data);
 	int psm = ZZOglGet_psm_FrameBits(data);
-	if (fbw > 0)
-		fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
+
+	if (fbw > 0) fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
+
 	return fbh ;
 }

 // Obtain fbm -- frame mask -- from data. All higher word.
-inline u32
-ZZOglGet_fbm_FrameBits(u32 data) {
-	return 	(data);
+inline u32 ZZOglGet_fbm_FrameBits(u32 data)
+{
+	return (data);
 }

 // Obtain fbm -- frame mask -- from data. All higher word. Fixed from psm == PCMT24 (without alpha)
-inline u32
-ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
+inline u32 ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI)
+{
 	if (PSMT_BITMODE(ZZOglGet_psm_FrameBits(dataLO)) == 1)
 		return (dataHI | 0xff000000);
 	else
@ -885,53 +940,51 @@ ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
 }

 // obtain colormask RED
-inline u32
-ZZOglGet_fbmRed_FrameBits(u32 data) {
+inline u32 ZZOglGet_fbmRed_FrameBits(u32 data)
+{
 	return (data & 0xff);
 }

 // obtain colormask Green
-inline u32
-ZZOglGet_fbmGreen_FrameBits(u32 data) {
+inline u32 ZZOglGet_fbmGreen_FrameBits(u32 data)
+{
 	return ((data >> 8) & 0xff);
 }

 // obtain colormask Blue
-inline u32
-ZZOglGet_fbmBlue_FrameBits(u32 data) {
+inline u32 ZZOglGet_fbmBlue_FrameBits(u32 data)
+{
 	return ((data >> 16) & 0xff);
 }

 // obtain colormask Alpha
-inline u32
-ZZOglGet_fbmAlpha_FrameBits(u32 data) {
+inline u32 ZZOglGet_fbmAlpha_FrameBits(u32 data)
+{
 	return ((data >> 24) & 0xff);
 }

 // obtain colormask Alpha
-inline u32
-ZZOglGet_fbmHighByte(u32 data) {
+inline u32 ZZOglGet_fbmHighByte(u32 data)
+{
 	return (!!(data & 0x80000000));
 }

-
-
 //-------------------------- tex0 comparison
 // Check if old and new tex0 registers have only clut difference
-inline bool
-ZZOglAllExceptClutIsSame( u32* oldtex, u32* newtex) {
+inline bool ZZOglAllExceptClutIsSame(u32* oldtex, u32* newtex)
+{
 	return ((oldtex[0] == newtex[0]) && ((oldtex[1] & 0x1f) == (newtex[1] & 0x1f)));
 }

 // Check if the CLUT registers are same, except CLD
-inline bool
-ZZOglClutMinusCLDunchanged( u32* oldtex, u32* newtex) {
+inline bool ZZOglClutMinusCLDunchanged(u32* oldtex, u32* newtex)
+{
 	return ((oldtex[1] & 0x1fffffe0) == (newtex[1] & 0x1fffffe0));
 }

 // Check if CLUT storage mode is not changed (CSA, CSM and CSPM)
-inline bool
-ZZOglClutStorageUnchanged( u32* oldtex, u32* newtex) {
+inline bool ZZOglClutStorageUnchanged(u32* oldtex, u32* newtex)
+{
 	return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000));
 }

--- a/plugins/zzogl-pg/opengl/GifTransfer.h
+++ b/plugins/zzogl-pg/opengl/GifTransfer.h
@ -39,29 +39,33 @@ union GIFTag
 {
 	u64 ai64[2];
 	u32 ai32[4];
+
 	struct
 	{
-		u32 NLOOP:15;
-		u32 EOP:1;
-		u32 _PAD1:16;
-		u32 _PAD2:14;
-		u32 PRE:1;
-		u32 PRIM:11;
-		u32 FLG:2; // enum GIF_FLG
-		u32 NREG:4;
-		u64 REGS:64;
+		u32 NLOOP : 15;
+		u32 EOP : 1;
+		u32 _PAD1 : 16;
+		u32 _PAD2 : 14;
+		u32 PRE : 1;
+		u32 PRIM : 11;
+		u32 FLG : 2; // enum GIF_FLG
+		u32 NREG : 4;
+		u64 REGS : 64;
 	};
+
 	void set(u32 *data)
 	{
-		for(int i = 0; i <= 3; i++)
+		for (int i = 0; i <= 3; i++)
 		{
 			ai32[i] = data[i];
 		}
 	}
+
 	GIFTag(u32 *data)
 	{
 		set(data);
 	}
+
 	GIFTag(){ ai64[0] = 0; ai64[1] = 0; }
 };

@ -101,13 +105,12 @@ typedef struct
 		// Hmm....
 		nreg	= tag.NREG << 2;
 		if (nreg == 0) nreg = 64;
-
 		regs = tag.REGS;
 		reg = 0;

-        //      ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
-        //                      data[3], data[2], data[1], data[0],
-        //                      path->eop, path->nloop, mode, path->nreg, tag.PRE);
+		//      ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
+		//                      data[3], data[2], data[1], data[0],
+		//                      path->eop, path->nloop, mode, path->nreg, tag.PRE);
 	}

 	u32 GetReg()
@ -124,49 +127,48 @@ typedef struct
 			reg = 0;
 			nloop--;

-			if (nloop == 0)
-			{
-				return false;
-			}
+			if (nloop == 0) return false;
 		}
+
 		return true;
 	}
+
 #else
-        void setTag(u32 *data)
-        {
-                tag.set(data);
+	void setTag(u32 *data)
+	{
+		tag.set(data);

-                nloop   = tag.NLOOP;
-                eop     = tag.EOP;
-                u32 tagpre              = tag.PRE;
-                u32 tagprim             = tag.PRIM;
-                u32 tagflg              = tag.FLG;
+		nloop   = tag.NLOOP;
+		eop     = tag.EOP;
+		u32 tagpre              = tag.PRE;
+		u32 tagprim             = tag.PRIM;
+		u32 tagflg              = tag.FLG;

-                // Hmm....
-                nreg    = tag.NREG << 2;
-                if (nreg == 0) nreg = 64;
+		// Hmm....
+		nreg    = tag.NREG << 2;
+		if (nreg == 0) nreg = 64;

-        //      ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
-        //                      data[3], data[2], data[1], data[0],
-        //                      path->eop, path->nloop, tagflg, path->nreg, tagpre);
+		//      ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
+		//                      data[3], data[2], data[1], data[0],
+		//                      path->eop, path->nloop, tagflg, path->nreg, tagpre);

-                mode = tagflg;
+		mode = tagflg;

-                switch (mode)
-                {
-					case GIF_FLG_PACKED:
-						regs = *(u64 *)(data+2);
-						regn = 0;
-						if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
+		switch (mode)
+		{
+			case GIF_FLG_PACKED:
+				regs = *(u64 *)(data + 2);
+				regn = 0;
+				if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
+				break;

-						break;
+			case GIF_FLG_REGLIST:
+				regs = *(u64 *)(data + 2);
+				regn = 0;
+				break;
+		}
+	}

-					case GIF_FLG_REGLIST:
-						regs = *(u64 *)(data+2);
-						regn = 0;
-						break;
-                }
-        }
 #endif
 } pathInfo;

--- a/plugins/zzogl-pg/opengl/Mem.h
+++ b/plugins/zzogl-pg/opengl/Mem.h
@ -23,9 +23,9 @@
 #include <vector>

 // works only when base is a power of 2
-static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val)+(base-1))&~(base-1)); }
-static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base-1)); }
-static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base-1)); }
+static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val) + (base - 1))&~(base - 1)); }
+static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base - 1)); }
+static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base - 1)); }

 // d3d texture dims
 const int BLOCK_TEXWIDTH = 128;
@ -33,13 +33,12 @@ const int BLOCK_TEXHEIGHT = 512;

 extern PCSX2_ALIGNED16(u32 tempblock[64]);

-
-typedef u32 ( *_getPixelAddress)(int x, int y, u32 bp, u32 bw);
-typedef u32 (*_getPixelAddress_0)(int x, int y, u32 bw);
+typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
+typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
 typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
 typedef void (*_writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
-typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
-typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
+typedef u32(*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
+typedef u32(*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
 typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
 typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
 typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
@ -53,6 +52,7 @@ enum Psm_Size

 // Both of the following structs should probably be local class variables or in a namespace,
 // but this works for the moment.
+
 struct TransferData
 {
 	// Signed because Visual C++ is weird.
@ -88,6 +88,7 @@ struct TransferFuncts
 };

 // rest not visible externally
+
 struct BLOCK
 {
 	BLOCK() { memset(this, 0, sizeof(BLOCK)); }
@ -142,14 +143,14 @@ extern u32 g_pageTable4[128][128];

 static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
 	u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
 	u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
 	return word;
 }
@ -165,70 +166,70 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)

 static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = bp * 128 + basepage * 4096 + g_pageTable16[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = bp * 128 + basepage * 4096 + g_pageTable16S[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
+	u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
 	u32 word = bp * 256 + basepage * 8192 + g_pageTable8[y&63][x&127];
 	return word;
 }

 static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
+	u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
 	u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
 	return word;
 }

 static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
+	u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
 	u32 word = bp * 512 + basepage * 16384 + g_pageTable4[y&127][x&127];
 	return word;
 }

 static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
+	u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
 	u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
 	return word;
 }

 static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
 	u32 word = bp * 64 + basepage * 2048 + g_pageTable32Z[y&31][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
 	u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
 	return word;
 }
@ -238,28 +239,28 @@ static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)

 static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = bp * 128 + basepage * 4096 + g_pageTable16Z[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = bp * 128 + basepage * 4096 + g_pageTable16SZ[y&63][x&63];
 	return word;
 }

 static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
 {
-	u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
+	u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
 	u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
 	return word;
 }
@ -276,9 +277,11 @@ static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32

 static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
 {
-	u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
-	u8 *pix = (u8*)&pixel;
-	buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
+	u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
+	u8 *pix = (u8*) & pixel;
+	buf[0] = pix[0];
+	buf[1] = pix[1];
+	buf[2] = pix[2];
 }

 static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -305,20 +308,21 @@ static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 b
 {
 	u32 addr = getPixelAddress4(x, y, bp, bw);
 	u8 pix = ((u8*)pmem)[addr/2];
-	if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
+
+	if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
 	else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
 }

 static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
 {
-	u8 *p = (u8*)pmem + 4*getPixelAddress4HL(x, y, bp, bw)+3;
+	u8 *p = (u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3;
 	*p = (*p & 0xf0) | pixel;
 }

 static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
 {
-	u8 *p = (u8*)pmem + 4*getPixelAddress4HH(x, y, bp, bw)+3;
-	*p = (*p & 0x0f) | (pixel<<4);
+	u8 *p = (u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3;
+	*p = (*p & 0x0f) | (pixel << 4);
 }

 static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -328,9 +332,11 @@ static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32

 static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
 {
-	u8 *buf = (u8*)pmem + 4*getPixelAddress32Z(x, y, bp, bw);
-	u8 *pix = (u8*)&pixel;
-	buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
+	u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z(x, y, bp, bw);
+	u8 *pix = (u8*) & pixel;
+	buf[0] = pix[0];
+	buf[1] = pix[1];
+	buf[2] = pix[2];
 }

 static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -380,20 +386,22 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
 {
 	u32 addr = getPixelAddress4(x, y, bp, bw);
 	u8 pix = ((const u8*)pmem)[addr/2];
+
 	if (addr & 0x1)
-		 return pix >> 4;
-	else return pix & 0xf;
+		return pix >> 4;
+	else 
+		return pix & 0xf;
 }

 static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
 {
-	const u8 *p = (const u8*)pmem+4*getPixelAddress4HL(x, y, bp, bw)+3;
+	const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3;
 	return *p & 0x0f;
 }

 static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw)
 {
-	const u8 *p = (const u8*)pmem+4*getPixelAddress4HH(x, y, bp, bw) + 3;
+	const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3;
 	return *p >> 4;
 }

@ -430,9 +438,11 @@ static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u3

 static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
 {
-	u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
-	u8 *pix = (u8*)&pixel;
-	buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
+	u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
+	u8 *pix = (u8*) & pixel;
+	buf[0] = pix[0];
+	buf[1] = pix[1];
+	buf[2] = pix[2];
 }

 static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -459,20 +469,21 @@ static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32
 {
 	u32 addr = getPixelAddress4_0(x, y, bw);
 	u8 pix = ((u8*)pmem)[addr/2];
-	if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
+
+	if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
 	else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
 }

 static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
 {
-	u8 *p = (u8*)pmem + 4*getPixelAddress4HL_0(x, y, bw)+3;
+	u8 *p = (u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
 	*p = (*p & 0xf0) | pixel;
 }

 static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
 {
-	u8 *p = (u8*)pmem + 4*getPixelAddress4HH_0(x, y, bw)+3;
-	*p = (*p & 0x0f) | (pixel<<4);
+	u8 *p = (u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
+	*p = (*p & 0x0f) | (pixel << 4);
 }

 static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -482,9 +493,11 @@ static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u

 static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
 {
-	u8 *buf = (u8*)pmem + 4*getPixelAddress32Z_0(x, y, bw);
-	u8 *pix = (u8*)&pixel;
-	buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
+	u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
+	u8 *pix = (u8*) & pixel;
+	buf[0] = pix[0];
+	buf[1] = pix[1];
+	buf[2] = pix[2];
 }

 static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -534,6 +547,7 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
 {
 	u32 addr = getPixelAddress4_0(x, y, bw);
 	u8 pix = ((const u8*)pmem)[addr/2];
+
 	if (addr & 0x1)
 		return pix >> 4;
 	else
@ -542,13 +556,13 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)

 static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
 {
-	const u8 *p = (const u8*)pmem+4*getPixelAddress4HL_0(x, y, bw)+3;
+	const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
 	return *p & 0x0f;
 }

 static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
 {
-	const u8 *p = (const u8*)pmem+4*getPixelAddress4HH_0(x, y, bw) + 3;
+	const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
 	return *p >> 4;
 }

--- a/plugins/zzogl-pg/opengl/Mem_Transmit.h
+++ b/plugins/zzogl-pg/opengl/Mem_Transmit.h
@ -14,57 +14,60 @@ extern u8* pstart;
 template <class T>
 static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
 {
-	assert( (nSize%widthlimit) == 0 && widthlimit <= 4 );
-	if ((gs.imageEndX-gs.trxpos.dx) % widthlimit)
+	assert((nSize % widthlimit) == 0 && widthlimit <= 4);
+
+	if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
 	{
 		// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);

-		for(; tempY < endY; ++tempY)
+		for (; tempY < endY; ++tempY)
 		{
-			for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
+			for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
 			{
 				/* write as many pixel at one time as possible */
-				wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
+				wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
 			}
 		}
 	}
-	for(; tempY < endY; ++tempY)
+
+	for (; tempY < endY; ++tempY)
 	{
-		for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
+		for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
 		{

 			/* write as many pixel at one time as possible */
-			if( nSize < widthlimit ) return NULL;
+			if (nSize < widthlimit) return NULL;

-			wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
+			wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);

-			if( widthlimit > 1 )
+			if (widthlimit > 1)
 			{
-				wp(pstart, (tempX+1)%2048, tempY%2048, buf[1], gs.dstbuf.bw);
+				wp(pstart, (tempX + 1) % 2048, tempY % 2048, buf[1], gs.dstbuf.bw);

-				if( widthlimit > 2 )
+				if (widthlimit > 2)
 				{
-					wp(pstart, (tempX+2)%2048, tempY%2048, buf[2], gs.dstbuf.bw);
+					wp(pstart, (tempX + 2) % 2048, tempY % 2048, buf[2], gs.dstbuf.bw);

-					if( widthlimit > 3 )
+					if (widthlimit > 3)
 					{
-						wp(pstart, (tempX+3)%2048, tempY%2048, buf[3], gs.dstbuf.bw);
+						wp(pstart, (tempX + 3) % 2048, tempY % 2048, buf[3], gs.dstbuf.bw);
 					}
 				}
 			}
 		}

-		if ( tempX >= gs.imageEndX )
+		if (tempX >= gs.imageEndX)
 		{
 			assert(tempX == gs.imageEndX);
 			tempX = gs.trxpos.dx;
 		}
 		else
 		{
-			assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 );
+			assert(gs.imageTransfer == -1 || nSize*sizeof(T) / 4 == 0);
 			return NULL;
 		}
 	}
+
 	return buf;
 }

@ -72,47 +75,14 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
 template <class T>
 static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
 {
-	if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit))
+	if (widthlimit != 8 || ((gs.imageEndX - gs.trxpos.dx) % widthlimit))
 	{
 		//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
-		for(; tempY < endY; ++tempY)
+		for (; tempY < endY; ++tempY)
 		{
-			for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
+			for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
 			{
-				wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf), gs.dstbuf.bw);
-			}
-
-			if( tempX >= gs.imageEndX )
-			{
-				assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
-				tempX = gs.trxpos.dx;
-			}
-			else
-			{
-				assert( gs.imageTransfer == -1 || nSize == 0 );
-				return NULL;
-			}
-		}
-	}
-	else
-	{
-		assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 );
-		for(; tempY < endY; ++tempY)
-		{
-			for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3*widthlimit)
-			{
-				if (nSize < widthlimit) return NULL;
-
-				/* write as many pixel at one time as possible */
-
-				wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf+0), gs.dstbuf.bw);
-				wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(buf+3), gs.dstbuf.bw);
-				wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(buf+6), gs.dstbuf.bw);
-				wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(buf+9), gs.dstbuf.bw);
-				wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(buf+12), gs.dstbuf.bw);
-				wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(buf+15), gs.dstbuf.bw);
-				wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(buf+18), gs.dstbuf.bw);
-				wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(buf+21), gs.dstbuf.bw);
+				wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf), gs.dstbuf.bw);
 			}

 			if (tempX >= gs.imageEndX)
@ -122,18 +92,55 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
 			}
 			else
 			{
-				if ( nSize < 0 )
-				{
-					/* extracted too much */
-					assert( (nSize%3)==0 && nSize > -24 );
-					tempX += nSize/3;
-					nSize = 0;
-				}
-				assert( gs.imageTransfer == -1 || nSize == 0 );
+				assert(gs.imageTransfer == -1 || nSize == 0);
 				return NULL;
 			}
 		}
 	}
+	else
+	{
+		assert(/*(nSize%widthlimit) == 0 &&*/ widthlimit == 8);
+
+		for (; tempY < endY; ++tempY)
+		{
+			for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
+			{
+				if (nSize < widthlimit) return NULL;
+
+				/* write as many pixel at one time as possible */
+
+				wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf + 0), gs.dstbuf.bw);
+				wp(pstart, (tempX + 1) % 2048, tempY % 2048, *(u32*)(buf + 3), gs.dstbuf.bw);
+				wp(pstart, (tempX + 2) % 2048, tempY % 2048, *(u32*)(buf + 6), gs.dstbuf.bw);
+				wp(pstart, (tempX + 3) % 2048, tempY % 2048, *(u32*)(buf + 9), gs.dstbuf.bw);
+				wp(pstart, (tempX + 4) % 2048, tempY % 2048, *(u32*)(buf + 12), gs.dstbuf.bw);
+				wp(pstart, (tempX + 5) % 2048, tempY % 2048, *(u32*)(buf + 15), gs.dstbuf.bw);
+				wp(pstart, (tempX + 6) % 2048, tempY % 2048, *(u32*)(buf + 18), gs.dstbuf.bw);
+				wp(pstart, (tempX + 7) % 2048, tempY % 2048, *(u32*)(buf + 21), gs.dstbuf.bw);
+			}
+
+			if (tempX >= gs.imageEndX)
+			{
+				assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
+				tempX = gs.trxpos.dx;
+			}
+			else
+			{
+				if (nSize < 0)
+				{
+					/* extracted too much */
+					assert((nSize % 3) == 0 && nSize > -24);
+					tempX += nSize / 3;
+					nSize = 0;
+				}
+
+				assert(gs.imageTransfer == -1 || nSize == 0);
+
+				return NULL;
+			}
+		}
+	}
+
 	return buf;
 }

@ -141,73 +148,84 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
 template <class T>
 static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
 {
-	for(; tempY < endY; ++tempY)
+	for (; tempY < endY; ++tempY)
 	{
-		for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
+		for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
 		{
 			/* write as many pixel at one time as possible */
-			wp(pstart, tempX%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
-			wp(pstart, (tempX+1)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
+			wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
+			wp(pstart, (tempX + 1) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
 			buf++;
-			if ( widthlimit > 2 )
+
+			if (widthlimit > 2)
 			{
-				wp(pstart, (tempX+2)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
-				wp(pstart, (tempX+3)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
+				wp(pstart, (tempX + 2) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
+				wp(pstart, (tempX + 3) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
 				buf++;

-				if( widthlimit > 4 )
+				if (widthlimit > 4)
 				{
-					wp(pstart, (tempX+4)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
-					wp(pstart, (tempX+5)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
+					wp(pstart, (tempX + 4) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
+					wp(pstart, (tempX + 5) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
 					buf++;

-					if( widthlimit > 6 )
+					if (widthlimit > 6)
 					{
-						wp(pstart, (tempX+6)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
-						wp(pstart, (tempX+7)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
+						wp(pstart, (tempX + 6) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
+						wp(pstart, (tempX + 7) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
 						buf++;
 					}
 				}
 			}
 		}

-		if ( tempX >= gs.imageEndX )
+		if (tempX >= gs.imageEndX)
 		{
 			tempX = gs.trxpos.dx;
 		}
 		else
 		{
-			assert( gs.imageTransfer == -1 || (nSize/32) == 0 );
+			assert(gs.imageTransfer == -1 || (nSize / 32) == 0);
 			return NULL;
 		}
 	}
+
 	return buf;
 }

 template <class T>
- static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
- {
- 	switch (data.psm)
- 	{
- 		case PSM_: return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
- 		case PSM_4_: return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
- 		case PSM_24_: return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
- 	}
- 	assert(0);
- 	return NULL;
- }
+static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
+{
+	switch (data.psm)
+	{
+		case PSM_:
+			return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
+
+		case PSM_4_:
+			return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
+
+		case PSM_24_:
+			return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
+	}
+
+	assert(0);
+
+	return NULL;
+}

 template <class T>
 static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
 {
-	for(u32 tempi = 0; tempi < blockheight; ++tempi)
+	for (u32 tempi = 0; tempi < blockheight; ++tempi)
 	{
-		for(tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
+		for (tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
 		{
-			wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0], gs.dstbuf.bw);
+			wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
 		}
+
 		buf += pitch - fracX;
 	}
+
 	return buf;
 }

@ -215,14 +233,16 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
 template <class T>
 static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
 {
-	for(u32 tempi = 0; tempi < blockheight; ++tempi)
+	for (u32 tempi = 0; tempi < blockheight; ++tempi)
 	{
-		for(tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
+		for (tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
 		{
-			wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)buf, gs.dstbuf.bw);
+			wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
 		}
-		buf += 3*(pitch-fracX);
+
+		buf += 3 * (pitch - fracX);
 	}
+
 	return buf;
 }

@ -230,30 +250,39 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
 template <class T>
 static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
 {
-	for(u32 tempi = 0; tempi < blockheight; ++tempi)
+	for (u32 tempi = 0; tempi < blockheight; ++tempi)
 	{
-		for(tempX = startX; tempX < gs.imageEndX; tempX+=2, buf++)
+		for (tempX = startX; tempX < gs.imageEndX; tempX += 2, buf++)
 		{
-			wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0]&0x0f, gs.dstbuf.bw);
-			wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, buf[0]>>4, gs.dstbuf.bw);
+			wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
+			wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);
 		}
-		buf += (pitch-fracX)/2;
+
+		buf += (pitch - fracX) / 2;
 	}
+
 	return buf;
 }

 template <class T>
- static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
- {
- 	switch (data.psm)
- 	{
- 		case PSM_: return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
- 		case PSM_4_: return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
- 		case PSM_24_: return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
- 	}
- 	assert(0);
- 	return NULL;
- }
+static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
+{
+	switch (data.psm)
+	{
+		case PSM_:
+			return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
+
+		case PSM_4_:
+			return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
+
+		case PSM_24_:
+			return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
+	}
+
+	assert(0);
+
+	return NULL;
+}

 // calculate pitch in source buffer
 static __forceinline u32 TransPitch(u32 pitch, u32 size)
--- a/plugins/zzogl-pg/opengl/Util.h
+++ b/plugins/zzogl-pg/opengl/Util.h
@ -70,21 +70,22 @@ extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file
 // declare linux equivalents
 static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align)
 {
-	assert( align < 0x10000 );
-	char* p = (char*)malloc(size+align);
-	int off = 2+align - ((int)(uptr)(p+2) % align);
+	assert(align < 0x10000);
+	char* p = (char*)malloc(size + align);
+	int off = 2 + align - ((int)(uptr)(p + 2) % align);

 	p += off;
-	*(u16*)(p-2) = off;
+	*(u16*)(p - 2) = off;

 	return p;
 }

 static __forceinline void pcsx2_aligned_free(void* pmem)
 {
-	if( pmem != NULL ) {
+	if (pmem != NULL)
+	{
 		char* p = (char*)pmem;
-		free(p - (int)*(u16*)(p-2));
+		free(p - (int)*(u16*)(p - 2));
 	}
 }

@ -98,7 +99,7 @@ inline unsigned long timeGetTime()
 	timeb t;
 	ftime(&t);

-	return (unsigned long)(t.time*1000+t.millitm);
+	return (unsigned long)(t.time*1000 + t.millitm);
 }

 struct RECT
@ -113,20 +114,24 @@ struct RECT
 #define min(a,b)			(((a) < (b)) ? (a) : (b))


-typedef struct {
+typedef struct
+{
 	int x, y, w, h;
 } Rect;

-typedef struct {
+typedef struct
+{
 	int x, y;
 } Point;

-typedef struct {
+typedef struct
+{
 	int x0, y0;
 	int x1, y1;
 } Rect2;

-typedef struct {
+typedef struct
+{
 	int x, y, c;
 } PointC;

@ -145,6 +150,7 @@ typedef struct {
 #define GSOPTION_LOADED		0x8000

 //Configuration values.
+
 typedef struct
 {
 	u8 mrtdepth; // write color in render target
@ -227,18 +233,18 @@ extern void __LogToConsole(const char *fmt, ...);

 namespace ZZLog
 {
-	extern void Message(const char *fmt, ...);
-	extern void Log(const char *fmt, ...);
-	extern void WriteToConsole(const char *fmt, ...);
-	extern void Print(const char *fmt, ...);
-	
-	extern void Greg_Log(const char *fmt, ...);
-	extern void Prim_Log(const char *fmt, ...);
-	extern void GS_Log(const char *fmt, ...);
-	
-	extern void Debug_Log(const char *fmt, ...);
-	extern void Warn_Log(const char *fmt, ...);
-	extern void Error_Log(const char *fmt, ...);
+extern void Message(const char *fmt, ...);
+extern void Log(const char *fmt, ...);
+extern void WriteToConsole(const char *fmt, ...);
+extern void Print(const char *fmt, ...);
+
+extern void Greg_Log(const char *fmt, ...);
+extern void Prim_Log(const char *fmt, ...);
+extern void GS_Log(const char *fmt, ...);
+
+extern void Debug_Log(const char *fmt, ...);
+extern void Warn_Log(const char *fmt, ...);
+extern void Error_Log(const char *fmt, ...);
 };

 #define REG64(name) \
@ -247,14 +253,14 @@ union name			\
 	u64 i64;		\
 	u32 ai32[2];	\
 	struct {		\
-
+ 
 #define REG128(name)\
 union name			\
 {					\
 	u64 ai64[2];	\
 	u32 ai32[4];	\
 	struct {		\
-
+ 
 #define REG64_(prefix, name) REG64(prefix##name)
 #define REG128_(prefix, name) REG128(prefix##name)

@ -266,13 +272,13 @@ union name			\
 {					\
 	u64 i64;		\
 	u32 ai32[2];	\
-
+ 
 #define REG128_SET(name)\
 union name			\
 {					\
 	u64 ai64[2];	\
 	u32 ai32[4];	\
-
+ 
 #define REG_SET_END };

 extern void LoadConfig();
@ -310,16 +316,18 @@ static __forceinline u64 GetTickFrequency()

 static __forceinline u64 GetCPUTicks()
 {
+
 	struct timeval t;
 	gettimeofday(&t, NULL);
-	return ((u64)t.tv_sec*GetTickFrequency())+t.tv_usec;
+	return ((u64)t.tv_sec*GetTickFrequency()) + t.tv_usec;
 }
+
 #else
 static __aligned16 LARGE_INTEGER lfreq;

 static __forceinline void InitCPUTicks()
 {
-	QueryPerformanceFrequency( &lfreq );
+	QueryPerformanceFrequency(&lfreq);
 }

 static __forceinline u64 GetTickFrequency()
@ -330,42 +338,47 @@ static __forceinline u64 GetTickFrequency()
 static __forceinline u64 GetCPUTicks()
 {
 	LARGE_INTEGER count;
-	QueryPerformanceCounter( &count );
+	QueryPerformanceCounter(&count);
 	return count.QuadPart;
 }
+
 #endif

 template <typename T>
+
 class CInterfacePtr
 {
-public:
-	inline CInterfacePtr() : ptr(NULL) {}
-	inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if ( ptr != NULL ) ptr->AddRef(); }
-	inline ~CInterfacePtr() { if( ptr != NULL ) ptr->Release(); }

-	inline T* operator* () { assert( ptr != NULL); return *ptr; }
-	inline T* operator->() { return ptr; }
-	inline T* get() { return ptr; }
+	public:
+		inline CInterfacePtr() : ptr(NULL) {}
+		inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if (ptr != NULL) ptr->AddRef(); }
+		inline ~CInterfacePtr() { if (ptr != NULL) ptr->Release(); }
+		inline T* operator*() { assert(ptr != NULL); return *ptr; }
+		inline T* operator->() { return ptr; }
+		inline T* get() { return ptr; }

-	inline void release() {
-		if( ptr != NULL ) { ptr->Release(); ptr = NULL; }
-	}
+		inline void release()
+		{
+			if (ptr != NULL) { ptr->Release(); ptr = NULL; }
+		}

-	inline operator T*() { return ptr; }
+		inline operator T*() { return ptr; }
+		inline bool operator==(T* rhs) { return ptr == rhs; }
+		inline bool operator!=(T* rhs) { return ptr != rhs; }

-	inline bool operator==(T* rhs) { return ptr == rhs; }
-	inline bool operator!=(T* rhs) { return ptr != rhs; }
+		inline CInterfacePtr& operator= (T* newptr)
+		{
+			if (ptr != NULL) ptr->Release();

-	inline CInterfacePtr& operator= (T* newptr) {
-		if( ptr != NULL ) ptr->Release();
-		ptr = newptr;
+			ptr = newptr;

-		if( ptr != NULL ) ptr->AddRef();
-		return *this;
-	}
+			if (ptr != NULL) ptr->AddRef();

-private:
-	T* ptr;
+			return *this;
+		}
+
+	private:
+		T* ptr;
 };


@ -380,24 +393,25 @@ void DVProfClear();						// clears all the profilers

 class DVProfileFunc
 {
-public:
-	u32 dwUserData;
-	DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; }
-	DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
-	~DVProfileFunc() { DVProfEnd(dwUserData); }
+	public:
+		u32 dwUserData;
+		DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; }
+		DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
+		~DVProfileFunc() { DVProfEnd(dwUserData); }
 };

 #else

 class DVProfileFunc
 {
-public:
-	u32 dwUserData;
-	static __forceinline DVProfileFunc(char* pname) {}
-	static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { }
-	~DVProfileFunc() {}
+
+	public:
+		u32 dwUserData;
+		static __forceinline DVProfileFunc(char* pname) {}
+		static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { }
+		~DVProfileFunc() {}
 };

 #endif
-        
+
 #endif // UTIL_H_INCLUDED
--- a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp
+++ b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp
@ -584,7 +584,7 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
 	return false;
 }

-// First try to draw frame from targets. It's
+// First try to draw frame from targets. 
 inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
 {
 	// get the start and end addresses of the buffer
@ -662,9 +662,15 @@ inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
 // The same as the previous, but from memory.
 // If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
 // this is the function that does it.
-inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int interlace, int bInterlace)
+inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
 {
-
+	// get the start and end addresses of the buffer
+	int bpp = RenderGetBpp(texframe.psm);
+	GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
+	
+	int start, end;
+	GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
+	
 	for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
 	{
 		(*it)->Resolve();
@ -676,34 +682,36 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
 	if ((pmemtarg == NULL) || (bInterlace >= 2))
 		ZZLog::Error_Log("CRCR Check for memory shader fault.");

+	//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
+		
 	SetShaderCaller("RenderCheckForMemory");

 	SetTexVariablesInt(0, g_bCRTCBilinear ? 2 : 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
-	cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
-	cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
-
 	if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
-
-	// finally render from the memory (note that the stencil buffer will keep previous regions)
-	Vector v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
-
+	Vector v;
+	
 	// Fixme: Why is this here?
 	// We should probably call RenderSetTargetBitTex instead.
 	if (g_bCRTCBilinear)
-		ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(texframe.tw, texframe.th, -0.5f, -0.5f), "g_fBitBltTex");
+		v = RenderSetTargetBitTex(texframe.tw, texframe.th, -0.5f, -0.5f, INTERLACE_COUNT);
 	else
-		ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th), "g_fBitBltTex");
+		v = RenderSetTargetBitTex(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th, INTERLACE_COUNT);

+	// finally render from the memory (note that the stencil buffer will keep previous regions)
+	v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
+	
 	v = RenderSetTargetBitTrans(texframe.th);

 	v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);

 	Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);

+	cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
+	cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
 	RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);

 	SETPIXELSHADER(ppsCRTC[bInterlace].prog);
-
+	GL_REPORT_ERRORD();
 	glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 }

@ -909,7 +917,6 @@ void ZeroGS::RenderCRTC(int interlace)
 	// start from the last circuit
 	for (int i = !PMODE->SLBG; i >= 0; --i)
 	{
-
 		tex0Info& texframe = dispinfo[i];

 		if (texframe.th <= 1) continue;
@ -928,7 +935,7 @@ void ZeroGS::RenderCRTC(int interlace)

 		// if we could not draw image from target's do it from memory
 		if (!RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace))
-			RenderCheckForMemory(texframe, listTargs, interlace, bInterlace);
+			RenderCheckForMemory(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
 	}

 	GL_REPORT_ERRORD();
--- a/plugins/zzogl-pg/opengl/ZZoglVB.cpp
+++ b/plugins/zzogl-pg/opengl/ZZoglVB.cpp
@ -337,7 +337,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
 }

 // After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made.
-inline void ZeroGS::VB::CheckFrame16vs32Convesion()
+inline void ZeroGS::VB::CheckFrame16vs32Conversion()
 {
 	if (prndr->status & CRenderTarget::TS_NeedConvert32)
 	{
@ -393,7 +393,7 @@ void ZeroGS::VB::CheckFrame(int tbp)

 		bChanged = CheckFrameResolveRender(tbp);

-		CheckFrame16vs32Convesion();
+		CheckFrame16vs32Conversion();
 	}
 	else if (bNeedZCheck)
 	{
--- a/plugins/zzogl-pg/opengl/rasterfont.h
+++ b/plugins/zzogl-pg/opengl/rasterfont.h
@ -1,22 +1,24 @@
 #ifndef RasterFont_Header
 #define RasterFont_Header

-class RasterFont {
-protected:
-	int	fontOffset;
+class RasterFont
+{

-public:
-	RasterFont();
-	~RasterFont(void);
-	static int debug;
+	protected:
+		int	fontOffset;

-	// some useful constants
-	enum	{char_width = 10};
-	enum	{char_height = 15};
+	public:
+		RasterFont();
+		~RasterFont(void);
+		static int debug;

-	// and the happy helper functions
-	void printString(const char *s, double x, double y, double z=0.0);
-	void printCenteredString(const char *s, double y, int screen_width, double z=0.0);
+		// some useful constants
+		enum	{char_width = 10};
+		enum	{char_height = 15};
+
+		// and the happy helper functions
+		void printString(const char *s, double x, double y, double z = 0.0);
+		void printCenteredString(const char *s, double y, int screen_width, double z = 0.0);
 };

 #endif
--- a/plugins/zzogl-pg/opengl/targets.h
+++ b/plugins/zzogl-pg/opengl/targets.h
@ -22,23 +22,27 @@
 #define TARGET_VIRTUAL_KEY 0x80000000
 #include "PS2Edefs.h"

-inline Vector DefaultOneColor( FRAGMENTSHADER ptr ) {
-	Vector v = Vector ( 1, 1, 1, 1 );
-	cgGLSetParameter4fv( ptr.sOneColor, v);
+inline Vector DefaultOneColor(FRAGMENTSHADER ptr)
+{
+	Vector v = Vector(1, 1, 1, 1);
+	cgGLSetParameter4fv(ptr.sOneColor, v);
 	return v ;
 }

-namespace ZeroGS {
+namespace ZeroGS
+{

-	inline u32 GetFrameKey (int fbp, int fbw, VB& curvb);
+inline u32 GetFrameKey(int fbp, int fbw, VB& curvb);

-	// manages render targets
-	class CRenderTargetMngr
-	{
+// manages render targets
+
+class CRenderTargetMngr
+{
 	public:
 		typedef map<u32, CRenderTarget*> MAPTARGETS;

-		enum TargetOptions {
+		enum TargetOptions
+		{
 			TO_DepthBuffer = 1,
 			TO_StrictHeight = 2, // height returned has to be the same as requested
 			TO_Virtual = 4
@ -50,16 +54,17 @@ namespace ZeroGS {
 		static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);

 		CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
-		inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) {
-			MAPTARGETS::iterator it = mapTargets.find (GetFrameKey(fbp, fbw, curvb));
+		inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
+		{
+			MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb));

-/*			if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
-			{
-				printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
-				printf("%x %x\n", fbp, fbw);
-				for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
-					printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
-			}*/
+			/*			if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
+						{
+							printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
+							printf("%x %x\n", fbp, fbw);
+							for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
+								printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
+						}*/
 			return it != mapTargets.end() ? it->second : NULL;
 		}

@ -68,8 +73,9 @@ namespace ZeroGS {

 		// resolves all targets within a range
 		__forceinline void Resolve(int start, int end);
-		__forceinline void ResolveAll() {
-			for(MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it )
+		__forceinline void ResolveAll()
+		{
+			for (MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it)
 				it->second->Resolve();
 		}

@ -77,12 +83,13 @@ namespace ZeroGS {
 		void DestroyIntersecting(CRenderTarget* prndr);

 		// promotes a target from virtual to real
-		inline CRenderTarget* Promote(u32 key) {
-			assert( !(key & TARGET_VIRTUAL_KEY) );
+		inline CRenderTarget* Promote(u32 key)
+		{
+			assert(!(key & TARGET_VIRTUAL_KEY));

 			// promote to regular targ
-			CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key|TARGET_VIRTUAL_KEY);
-			assert( it != mapTargets.end() );
+			CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key | TARGET_VIRTUAL_KEY);
+			assert(it != mapTargets.end());

 			CRenderTarget* ptarg = it->second;
 			mapTargets.erase(it);
@ -90,31 +97,36 @@ namespace ZeroGS {
 			DestroyIntersecting(ptarg);

 			it = mapTargets.find(key);
-			if( it != mapTargets.end() ) {
+
+			if (it != mapTargets.end())
+			{
 				DestroyTarg(it->second);
 				it->second = ptarg;
 			}
 			else
 				mapTargets[key] = ptarg;

-		        if( g_GameSettings & GAME_RESOLVEPROMOTED )
-                		ptarg->status = CRenderTarget::TS_Resolved;
-				else
-			ptarg->status = CRenderTarget::TS_NeedUpdate;
-				return ptarg;
+			if (g_GameSettings & GAME_RESOLVEPROMOTED)
+				ptarg->status = CRenderTarget::TS_Resolved;
+			else
+				ptarg->status = CRenderTarget::TS_NeedUpdate;
+
+			return ptarg;
 		}

 		static void DestroyTarg(CRenderTarget* ptarg);

 		MAPTARGETS mapTargets, mapDummyTargs;
-	};
+};
+
+class CMemoryTargetMngr
+{

-	class CMemoryTargetMngr
-	{
 	public:
 		CMemoryTargetMngr() : curstamp(0) {}
+
 		CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
-		CMemoryTarget* MemoryTarget_SearchExistTarget (int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
+		CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
 		CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);

 		void Destroy(); // destroy all targs
@ -122,26 +134,28 @@ namespace ZeroGS {
 		void ClearRange(int starty, int endy); // set all targets to cleared
 		void DestroyCleared(); // flush all cleared targes
 		void DestroyOldest();
-		
+
 		list<CMemoryTarget> listTargets, listClearedTargets;
 		u32 curstamp;

 	private:
 		list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
-	};
+};

-	class CBitwiseTextureMngr
-	{
+class CBitwiseTextureMngr
+{
 	public:
 		~CBitwiseTextureMngr() { Destroy(); }

 		void Destroy();

 		// since GetTex can delete textures to free up mem, it is dangerous if using that texture, so specify at least one other tex to save
-		__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) {
+		__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete)
+		{
 			map<u32, u32>::iterator it = mapTextures.find(bitvalue);
-			if( it != mapTextures.end() )
-				return it->second;
+
+			if (it != mapTextures.end()) return it->second;
+
 			return GetTexInt(bitvalue, ptexDoNotDelete);
 		}

@ -149,99 +163,115 @@ namespace ZeroGS {
 		u32 GetTexInt(u32 bitvalue, u32 ptexDoNotDelete);

 		map<u32, u32> mapTextures;
-	};
+};

-	// manages
-	class CRangeManager
-	{
+// manages
+
+class CRangeManager
+{
 	public:
-		CRangeManager() {
+		CRangeManager()
+		{
 			ranges.reserve(16);
 		}

 		// [start, end)
-		struct RANGE {
+
+		struct RANGE
+		{
 			RANGE() {}
+
 			inline RANGE(int start, int end) : start(start), end(end) {}
+
 			int start, end;
 		};

 		// works in semi logN
 		void Insert(int start, int end);
 		void RangeSanityCheck();
-		inline void Clear() {
+		inline void Clear()
+		{
 			ranges.resize(0);
 		}

 		vector<RANGE> ranges; // organized in ascending order, non-intersecting
-	};
+};

-	extern CRenderTargetMngr s_RTs, s_DepthRTs;
-	extern CBitwiseTextureMngr s_BitwiseTextures;
-	extern CMemoryTargetMngr g_MemTargs;
+extern CRenderTargetMngr s_RTs, s_DepthRTs;
+extern CBitwiseTextureMngr s_BitwiseTextures;
+extern CMemoryTargetMngr g_MemTargs;

-	extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
+extern u8 s_AAx, s_AAy, s_AAz, s_AAw;

-	// Real rendered width, depends on AA and AAneg.
-	inline int RW(int tbw) {
-		if (s_AAx >= s_AAz)
-			return (tbw << ( s_AAx - s_AAz ));
-		else
-			return (tbw >> ( s_AAz - s_AAx ));
-	}
+// Real rendered width, depends on AA and AAneg.
+inline int RW(int tbw)
+{
+	if (s_AAx >= s_AAz)
+		return (tbw << (s_AAx - s_AAz));
+	else
+		return (tbw >> (s_AAz - s_AAx));
+}

-	// Real rendered height, depends on AA and AAneg.
-	inline int RH(int tbh) {
-		if (s_AAy >= s_AAw)
-			return (tbh << ( s_AAy - s_AAw ));
-		else
-			return (tbh >> ( s_AAw - s_AAy ));
-	}
+// Real rendered height, depends on AA and AAneg.
+inline int RH(int tbh)
+{
+	if (s_AAy >= s_AAw)
+		return (tbh << (s_AAy - s_AAw));
+	else
+		return (tbh >> (s_AAw - s_AAy));
+}

 /*	inline void CreateTargetsList(int start, int end, list<ZeroGS::CRenderTarget*>& listTargs) {
 		s_DepthRTs.GetTargs(start, end, listTargs);
 		s_RTs.GetTargs(start, end, listTargs);
 	}*/

-	// This pattern of functions is called 3 times, so I add creating Targets list into one.
-	inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end) {
-		list<ZeroGS::CRenderTarget*> listTargs;
-		s_DepthRTs.GetTargs(start, end, listTargs);
-		s_RTs.GetTargs(start, end, listTargs);
-		return listTargs;
-	}
+// This pattern of functions is called 3 times, so I add creating Targets list into one.
+inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
+{
+	list<ZeroGS::CRenderTarget*> listTargs;
+	s_DepthRTs.GetTargs(start, end, listTargs);
+	s_RTs.GetTargs(start, end, listTargs);
+	return listTargs;
+}

-	extern Vector g_vdepth;
-	extern int icurctx;
+extern Vector g_vdepth;
+extern int icurctx;

-	extern VERTEXSHADER pvsBitBlt;
-	extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
-	extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
-	extern GLuint vboRect;
+extern VERTEXSHADER pvsBitBlt;
+extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
+extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
+extern GLuint vboRect;

 // Unworking
 #define PSMPOSITION 28

 // Code width and height of frame into key, that used in targetmanager
 // This is 3 variants of one function, Key dependant on fbp and fbw.
-inline u32 GetFrameKey (const frameInfo& frame) {
+inline u32 GetFrameKey(const frameInfo& frame)
+{
 	return (((frame.fbw) << 16) | (frame.fbp));
 }
-inline u32 GetFrameKey ( CRenderTarget* frame ) {
+
+inline u32 GetFrameKey(CRenderTarget* frame)
+{
 	return (((frame->fbw) << 16) | (frame->fbp));
 }

-inline u32 GetFrameKey (int fbp, int fbw,  VB& curvb) {
+inline u32 GetFrameKey(int fbp, int fbw,  VB& curvb)
+{
 	return (((fbw) << 16) | (fbp));
 }

-inline u16 ShiftHeight (int fbh, int fbp, int fbhCalc) {
+inline u16 ShiftHeight(int fbh, int fbp, int fbhCalc)
+{
 	return fbh;
 }

-//FIXME: this code for P4 ad KH1. It should not be such strange!
+//FIXME: this code is for P4 and KH1. It should not be so strange!
 //Dummy targets was deleted from mapTargets, but not erased.
-inline u32 GetFrameKeyDummy (const frameInfo& frame) {
+inline u32 GetFrameKeyDummy(const frameInfo& frame)
+{
 //	if (frame.fbp > 0x2000 && ZZOgl_fbh_Calc(frame) < 0x400 && ZZOgl_fbh_Calc(frame) != frame.fbh)
 //		printf ("Z %x %x %x %x\n", frame.fbh, frame.fbhCalc, frame.fbp, ZZOgl_fbh_Calc(frame));
 	// height over 1024 would shrink to 1024, so dummy targets with calculated size more than 0x400 should be
@ -252,7 +282,8 @@ inline u32 GetFrameKeyDummy (const frameInfo& frame) {
 		return (((frame.fbw) << 16) | frame.fbh);
 }

-inline u32 GetFrameKeyDummy ( CRenderTarget* frame ) {
+inline u32 GetFrameKeyDummy(CRenderTarget* frame)
+{
 	if (/*frame->fbp > 0x2000 && */ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm) < 0x300)
 		return (((frame->fbw) << 16) | ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm));
 	else
--- a/plugins/zzogl-pg/opengl/x86.h
+++ b/plugins/zzogl-pg/opengl/x86.h
@ -106,7 +106,7 @@ extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);
 extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut);
 extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut);

-extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters );
+extern void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters);

 #ifdef ZEROGS_SSE2

--- a/plugins/zzogl-pg/opengl/zerogs.h
+++ b/plugins/zzogl-pg/opengl/zerogs.h
--- a/plugins/zzogl-pg/opengl/zerogsmath.h
+++ b/plugins/zzogl-pg/opengl/zerogsmath.h
@ -17,6 +17,7 @@
 template <class T> inline T RAD_2_DEG(T radians)  { return (radians * (T)57.29577951); }

 class Transform;
+
 class TransformMatrix;

 typedef float dReal;
@ -35,63 +36,57 @@ inline dReal* inv4(const dReal* pf, dReal* pfres);

 // class used for 3 and 4 dim vectors and quaternions
 // It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
+
 class Vector
 {
-public:
-	dReal x, y, z, w;
+	public:
+		dReal x, y, z, w;

-	Vector() : x(0), y(0), z(0), w(0) {}
-	Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
-	Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
-	Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
-	Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
+		Vector() : x(0), y(0), z(0), w(0) {}
+		Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
+		Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
+		Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
+		Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
+		dReal  operator[](int i) const	   { return (&x)[i]; }
+		dReal& operator[](int i)			 { return (&x)[i]; }
+		
+		// casting operators
+		operator dReal*() { return &x; }
+		operator const dReal*() const { return (const dReal*)&x; }
+		
+		// SCALAR FUNCTIONS
+		inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
+		inline void normalize() { normalize4(&x, &x); }
+		inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
+		inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
+		inline void SetColor(u32 color)
+		{
+			x = (color & 0xff) / 255.0f;
+			y = ((color >> 8) & 0xff) / 255.0f;
+			z = ((color >> 16) & 0xff) / 255.0f;
+		}

-	dReal  operator[](int i) const	   { return (&x)[i]; }
-	dReal& operator[](int i)			 { return (&x)[i]; }
-
-	// casting operators
-	operator dReal* () { return &x; }
-	operator const dReal* () const { return (const dReal*)&x; }
-
-	// SCALAR FUNCTIONS
-	inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
-	inline void normalize() { normalize4(&x, &x); }
-
-	inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
-	inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
-	inline void SetColor(u32 color) 
-	{
-		x = (color & 0xff) / 255.0f;
-		y = ((color >> 8) & 0xff) / 255.0f;
-		z = ((color >> 16) & 0xff) / 255.0f;
-	}
-
-	// 3 dim cross product, w is not touched
-	/// this = this x v
-	inline void Cross(const Vector &v) { cross3(&x, &x, v); }
-
-	/// this = u x v
-	inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
-
-	inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
-	inline Vector operator+(const Vector &r) const { Vector v; v.x = x+r.x; v.y = y+r.y; v.z = z+r.z; v.w = w+r.w; return v; }
-	inline Vector operator-(const Vector &r) const { Vector v; v.x = x-r.x; v.y = y-r.y; v.z = z-r.z; v.w = w-r.w; return v; }
-	inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x*x; v.y = r.y*y; v.z = r.z*z; v.w = r.w*w; return v; }
-	inline Vector operator*(dReal k) const { Vector v; v.x = k*x; v.y = k*y; v.z = k*z; v.w = k*w; return v; }
-
-	inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
-	inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
-	inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
-
-	inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
-	inline Vector& operator /= (const dReal _k) { dReal k=1/_k; x *= k; y *= k; z *= k; w *= k; return *this; }
-
-	friend Vector operator* (float f, const Vector& v);
-	//friend ostream& operator<<(ostream& O, const Vector& v);
-	//friend istream& operator>>(istream& I, Vector& v);
+		// 3 dim cross product, w is not touched
+		/// this = this x v
+		inline void Cross(const Vector &v) { cross3(&x, &x, v); }
+		/// this = u x v
+		inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
+		inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
+		inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
+		inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
+		inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
+		inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
+		inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
+		inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
+		inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
+		inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
+		inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
+		friend Vector operator*(float f, const Vector& v);
+		//friend ostream& operator<<(ostream& O, const Vector& v);
+		//friend istream& operator>>(istream& I, Vector& v);
 };

-inline Vector operator* (float f, const Vector& left)
+inline Vector operator*(float f, const Vector& left)
 {
 	Vector v;
 	v.x = f * left.x;
@ -113,18 +108,22 @@ struct OBB
 struct TRIANGLE
 {
 	TRIANGLE() {}
+
 	TRIANGLE(const Vector& v1, const Vector& v2, const Vector& v3) : v1(v1), v2(v2), v3(v3) {}
+
 	~TRIANGLE() {}

 	Vector v1, v2, v3;	  //!< the vertices of the triangle

 	const Vector& operator[](int i) const { return (&v1)[i]; }
-	Vector&	   operator[](int i)	   { return (&v1)[i]; }
+
+	Vector& operator[](int i) { return (&v1)[i]; }

 	/// assumes CCW ordering of vertices
-	inline Vector ComputeNormal() {
+	inline Vector ComputeNormal()
+	{
 		Vector normal;
-		cross3(normal, v2-v1, v3-v1);
+		cross3(normal, v2 - v1, v3 - v1);
 		return normal;
 	}
 };
@ -172,8 +171,8 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf);
 inline bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2x, dReal& fv2y);

 // Simple routines for linear algebra algorithms //
-int CubicRoots (double c0, double c1, double c2, double *r0, double *r1, double *r2);
-bool QLAlgorithm3 (dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag);
+int CubicRoots(double c0, double c1, double c2, double *r0, double *r1, double *r2);
+bool QLAlgorithm3(dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag);

 void EigenSymmetric3(dReal* fCovariance, dReal* eval, dReal* fAxes);

@ -182,7 +181,7 @@ void GetCovarBasisVectors(dReal fCovariance[3][3], Vector* vRight, Vector* vUp,
 // first root returned is always >= second, roots are defined if the quadratic doesn't have real solutions
 void QuadraticSolver(dReal* pfQuadratic, dReal* pfRoots);

-int insideQuadrilateral(const Vector* p0,const Vector* p1, const Vector* p2,const Vector* p3);
+int insideQuadrilateral(const Vector* p0, const Vector* p1, const Vector* p2, const Vector* p3);
 int insideTriangle(const Vector* p0, const Vector* p1, const Vector* p2);

 // multiplies a matrix by a scalar
@ -238,30 +237,48 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
 	c = pfmat[0] * pfmat[3] - pfmat[1] * pfmat[2];
 	d = b * b - 4.0f * c + 1e-16f;

-	if( d < 0 ) return false;
-	if( d < 1e-16f ) {
+	if (d < 0) return false;
+
+	if (d < 1e-16f)
+	{
 		a = -0.5f * b;
-		peigs[0] = a;	peigs[1] = a;
-		fv1x = pfmat[1];		fv1y = a - pfmat[0];
-		c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y);
-		fv1x *= c;		fv1y *= c;
-		fv2x = -fv1y;		fv2y = fv1x;
+		peigs[0] = a;
+		peigs[1] = a;
+		fv1x = pfmat[1];
+		fv1y = a - pfmat[0];
+		c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
+		fv1x *= c;
+		fv1y *= c;
+		fv2x = -fv1y;
+		fv2y = fv1x;
 		return true;
 	}

 	// two roots
 	d = sqrtf(d);
+
 	a = -0.5f * (b + d);
 	peigs[0] = a;
-	fv1x = pfmat[1];		fv1y = a-pfmat[0];
-	c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y);
-	fv1x *= c;		fv1y *= c;
+
+	fv1x = pfmat[1];
+	fv1y = a - pfmat[0];
+
+	c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
+
+	fv1x *= c;
+	fv1y *= c;

 	a += d;
 	peigs[1] = a;
-	fv2x = pfmat[1];		fv2y = a-pfmat[0];
-	c = 1 / sqrtf(fv2x*fv2x + fv2y*fv2y);
-	fv2x *= c;		fv2y *= c;
+
+	fv2x = pfmat[1];
+	fv2y = a - pfmat[0];
+
+	c = 1 / sqrtf(fv2x * fv2x + fv2y * fv2y);
+
+	fv2x *= c;
+	fv2y *= c;
+
 	return true;
 }

@ -270,62 +287,70 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
 // Functions that are replacable by ipp library funcs
 template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
 {
-	assert( pf1 != NULL && pf2 != NULL && pfres != NULL );
+	assert(pf1 != NULL && pf2 != NULL && pfres != NULL);

 	T* pfres2;
-	if( pfres == pf1 || pfres == pf2 ) pfres2 = (T*)alloca(9 * sizeof(T));
-	else pfres2 = pfres;

-	pfres2[0*4+0] = pf1[0*4+0]*pf2[0*4+0]+pf1[0*4+1]*pf2[1*4+0]+pf1[0*4+2]*pf2[2*4+0];
-	pfres2[0*4+1] = pf1[0*4+0]*pf2[0*4+1]+pf1[0*4+1]*pf2[1*4+1]+pf1[0*4+2]*pf2[2*4+1];
-	pfres2[0*4+2] = pf1[0*4+0]*pf2[0*4+2]+pf1[0*4+1]*pf2[1*4+2]+pf1[0*4+2]*pf2[2*4+2];
+	if (pfres == pf1 || pfres == pf2) 
+		pfres2 = (T*)alloca(9 * sizeof(T));
+	else 
+		pfres2 = pfres;

-	pfres2[1*4+0] = pf1[1*4+0]*pf2[0*4+0]+pf1[1*4+1]*pf2[1*4+0]+pf1[1*4+2]*pf2[2*4+0];
-	pfres2[1*4+1] = pf1[1*4+0]*pf2[0*4+1]+pf1[1*4+1]*pf2[1*4+1]+pf1[1*4+2]*pf2[2*4+1];
-	pfres2[1*4+2] = pf1[1*4+0]*pf2[0*4+2]+pf1[1*4+1]*pf2[1*4+2]+pf1[1*4+2]*pf2[2*4+2];
+	pfres2[0*4+0] = pf1[0*4+0] * pf2[0*4+0] + pf1[0*4+1] * pf2[1*4+0] + pf1[0*4+2] * pf2[2*4+0];
+	pfres2[0*4+1] = pf1[0*4+0] * pf2[0*4+1] + pf1[0*4+1] * pf2[1*4+1] + pf1[0*4+2] * pf2[2*4+1];
+	pfres2[0*4+2] = pf1[0*4+0] * pf2[0*4+2] + pf1[0*4+1] * pf2[1*4+2] + pf1[0*4+2] * pf2[2*4+2];
+	
+	pfres2[1*4+0] = pf1[1*4+0] * pf2[0*4+0] + pf1[1*4+1] * pf2[1*4+0] + pf1[1*4+2] * pf2[2*4+0];
+	pfres2[1*4+1] = pf1[1*4+0] * pf2[0*4+1] + pf1[1*4+1] * pf2[1*4+1] + pf1[1*4+2] * pf2[2*4+1];
+	pfres2[1*4+2] = pf1[1*4+0] * pf2[0*4+2] + pf1[1*4+1] * pf2[1*4+2] + pf1[1*4+2] * pf2[2*4+2];
+	
+	pfres2[2*4+0] = pf1[2*4+0] * pf2[0*4+0] + pf1[2*4+1] * pf2[1*4+0] + pf1[2*4+2] * pf2[2*4+0];
+	pfres2[2*4+1] = pf1[2*4+0] * pf2[0*4+1] + pf1[2*4+1] * pf2[1*4+1] + pf1[2*4+2] * pf2[2*4+1];
+	pfres2[2*4+2] = pf1[2*4+0] * pf2[0*4+2] + pf1[2*4+1] * pf2[1*4+2] + pf1[2*4+2] * pf2[2*4+2];

-	pfres2[2*4+0] = pf1[2*4+0]*pf2[0*4+0]+pf1[2*4+1]*pf2[1*4+0]+pf1[2*4+2]*pf2[2*4+0];
-	pfres2[2*4+1] = pf1[2*4+0]*pf2[0*4+1]+pf1[2*4+1]*pf2[1*4+1]+pf1[2*4+2]*pf2[2*4+1];
-	pfres2[2*4+2] = pf1[2*4+0]*pf2[0*4+2]+pf1[2*4+1]*pf2[1*4+2]+pf1[2*4+2]*pf2[2*4+2];
-
-	if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
+	if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T));

 	return pfres;
 }

 inline dReal* mult3(dReal* pfres, const dReal* pf1, const dReal* pf2) { return _mult3<dReal>(pfres, pf1, pf2); }
+
 inline double* mult3(double* pfres, const double* pf1, const double* pf2) { return _mult3<double>(pfres, pf1, pf2); }

 template <class T>
 inline T* _mult4(T* pfres, const T* p1, const T* p2)
 {
-	assert( pfres != NULL && p1 != NULL && p2 != NULL );
+	assert(pfres != NULL && p1 != NULL && p2 != NULL);

 	T* pfres2;
-	if( pfres == p1 || pfres == p2 ) pfres2 = (T*)alloca(16 * sizeof(T));
-	else pfres2 = pfres;

-	pfres2[0*4+0] = p1[0*4+0]*p2[0*4+0] + p1[0*4+1]*p2[1*4+0] + p1[0*4+2]*p2[2*4+0] + p1[0*4+3]*p2[3*4+0];
-	pfres2[0*4+1] = p1[0*4+0]*p2[0*4+1] + p1[0*4+1]*p2[1*4+1] + p1[0*4+2]*p2[2*4+1] + p1[0*4+3]*p2[3*4+1];
-	pfres2[0*4+2] = p1[0*4+0]*p2[0*4+2] + p1[0*4+1]*p2[1*4+2] + p1[0*4+2]*p2[2*4+2] + p1[0*4+3]*p2[3*4+2];
-	pfres2[0*4+3] = p1[0*4+0]*p2[0*4+3] + p1[0*4+1]*p2[1*4+3] + p1[0*4+2]*p2[2*4+3] + p1[0*4+3]*p2[3*4+3];
+	if (pfres == p1 || pfres == p2) 
+		pfres2 = (T*)alloca(16 * sizeof(T));
+	else 
+		pfres2 = pfres;

-	pfres2[1*4+0] = p1[1*4+0]*p2[0*4+0] + p1[1*4+1]*p2[1*4+0] + p1[1*4+2]*p2[2*4+0] + p1[1*4+3]*p2[3*4+0];
-	pfres2[1*4+1] = p1[1*4+0]*p2[0*4+1] + p1[1*4+1]*p2[1*4+1] + p1[1*4+2]*p2[2*4+1] + p1[1*4+3]*p2[3*4+1];
-	pfres2[1*4+2] = p1[1*4+0]*p2[0*4+2] + p1[1*4+1]*p2[1*4+2] + p1[1*4+2]*p2[2*4+2] + p1[1*4+3]*p2[3*4+2];
-	pfres2[1*4+3] = p1[1*4+0]*p2[0*4+3] + p1[1*4+1]*p2[1*4+3] + p1[1*4+2]*p2[2*4+3] + p1[1*4+3]*p2[3*4+3];
+	pfres2[0*4+0] = p1[0*4+0] * p2[0*4+0] + p1[0*4+1] * p2[1*4+0] + p1[0*4+2] * p2[2*4+0] + p1[0*4+3] * p2[3*4+0];
+	pfres2[0*4+1] = p1[0*4+0] * p2[0*4+1] + p1[0*4+1] * p2[1*4+1] + p1[0*4+2] * p2[2*4+1] + p1[0*4+3] * p2[3*4+1];
+	pfres2[0*4+2] = p1[0*4+0] * p2[0*4+2] + p1[0*4+1] * p2[1*4+2] + p1[0*4+2] * p2[2*4+2] + p1[0*4+3] * p2[3*4+2];
+	pfres2[0*4+3] = p1[0*4+0] * p2[0*4+3] + p1[0*4+1] * p2[1*4+3] + p1[0*4+2] * p2[2*4+3] + p1[0*4+3] * p2[3*4+3];

-	pfres2[2*4+0] = p1[2*4+0]*p2[0*4+0] + p1[2*4+1]*p2[1*4+0] + p1[2*4+2]*p2[2*4+0] + p1[2*4+3]*p2[3*4+0];
-	pfres2[2*4+1] = p1[2*4+0]*p2[0*4+1] + p1[2*4+1]*p2[1*4+1] + p1[2*4+2]*p2[2*4+1] + p1[2*4+3]*p2[3*4+1];
-	pfres2[2*4+2] = p1[2*4+0]*p2[0*4+2] + p1[2*4+1]*p2[1*4+2] + p1[2*4+2]*p2[2*4+2] + p1[2*4+3]*p2[3*4+2];
-	pfres2[2*4+3] = p1[2*4+0]*p2[0*4+3] + p1[2*4+1]*p2[1*4+3] + p1[2*4+2]*p2[2*4+3] + p1[2*4+3]*p2[3*4+3];
+	pfres2[1*4+0] = p1[1*4+0] * p2[0*4+0] + p1[1*4+1] * p2[1*4+0] + p1[1*4+2] * p2[2*4+0] + p1[1*4+3] * p2[3*4+0];
+	pfres2[1*4+1] = p1[1*4+0] * p2[0*4+1] + p1[1*4+1] * p2[1*4+1] + p1[1*4+2] * p2[2*4+1] + p1[1*4+3] * p2[3*4+1];
+	pfres2[1*4+2] = p1[1*4+0] * p2[0*4+2] + p1[1*4+1] * p2[1*4+2] + p1[1*4+2] * p2[2*4+2] + p1[1*4+3] * p2[3*4+2];
+	pfres2[1*4+3] = p1[1*4+0] * p2[0*4+3] + p1[1*4+1] * p2[1*4+3] + p1[1*4+2] * p2[2*4+3] + p1[1*4+3] * p2[3*4+3];
+	
+	pfres2[2*4+0] = p1[2*4+0] * p2[0*4+0] + p1[2*4+1] * p2[1*4+0] + p1[2*4+2] * p2[2*4+0] + p1[2*4+3] * p2[3*4+0];
+	pfres2[2*4+1] = p1[2*4+0] * p2[0*4+1] + p1[2*4+1] * p2[1*4+1] + p1[2*4+2] * p2[2*4+1] + p1[2*4+3] * p2[3*4+1];
+	pfres2[2*4+2] = p1[2*4+0] * p2[0*4+2] + p1[2*4+1] * p2[1*4+2] + p1[2*4+2] * p2[2*4+2] + p1[2*4+3] * p2[3*4+2];
+	pfres2[2*4+3] = p1[2*4+0] * p2[0*4+3] + p1[2*4+1] * p2[1*4+3] + p1[2*4+2] * p2[2*4+3] + p1[2*4+3] * p2[3*4+3];
+	
+	pfres2[3*4+0] = p1[3*4+0] * p2[0*4+0] + p1[3*4+1] * p2[1*4+0] + p1[3*4+2] * p2[2*4+0] + p1[3*4+3] * p2[3*4+0];
+	pfres2[3*4+1] = p1[3*4+0] * p2[0*4+1] + p1[3*4+1] * p2[1*4+1] + p1[3*4+2] * p2[2*4+1] + p1[3*4+3] * p2[3*4+1];
+	pfres2[3*4+2] = p1[3*4+0] * p2[0*4+2] + p1[3*4+1] * p2[1*4+2] + p1[3*4+2] * p2[2*4+2] + p1[3*4+3] * p2[3*4+2];
+	pfres2[3*4+3] = p1[3*4+0] * p2[0*4+3] + p1[3*4+1] * p2[1*4+3] + p1[3*4+2] * p2[2*4+3] + p1[3*4+3] * p2[3*4+3];

-	pfres2[3*4+0] = p1[3*4+0]*p2[0*4+0] + p1[3*4+1]*p2[1*4+0] + p1[3*4+2]*p2[2*4+0] + p1[3*4+3]*p2[3*4+0];
-	pfres2[3*4+1] = p1[3*4+0]*p2[0*4+1] + p1[3*4+1]*p2[1*4+1] + p1[3*4+2]*p2[2*4+1] + p1[3*4+3]*p2[3*4+1];
-	pfres2[3*4+2] = p1[3*4+0]*p2[0*4+2] + p1[3*4+1]*p2[1*4+2] + p1[3*4+2]*p2[2*4+2] + p1[3*4+3]*p2[3*4+2];
-	pfres2[3*4+3] = p1[3*4+0]*p2[0*4+3] + p1[3*4+1]*p2[1*4+3] + p1[3*4+2]*p2[2*4+3] + p1[3*4+3]*p2[3*4+3];
+	if (pfres != pfres2) memcpy(pfres, pfres2, sizeof(T)*16);

-	if( pfres != pfres2 ) memcpy(pfres, pfres2, sizeof(T)*16);
 	return pfres;
 }

@ -336,22 +361,23 @@ template <class T>
 inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2)
 {
 	T* pfres2;
-	if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T));
-	else pfres2 = pfres;

-	pfres2[0] = pf1[0]*pf2[0]+pf1[3]*pf2[3]+pf1[6]*pf2[6];
-	pfres2[1] = pf1[0]*pf2[1]+pf1[3]*pf2[4]+pf1[6]*pf2[7];
-	pfres2[2] = pf1[0]*pf2[2]+pf1[3]*pf2[5]+pf1[6]*pf2[8];
+	if (pfres == pf1) 
+		pfres2 = (T*)alloca(9 * sizeof(T));
+	else 
+		pfres2 = pfres;

-	pfres2[3] = pf1[1]*pf2[0]+pf1[4]*pf2[3]+pf1[7]*pf2[6];
-	pfres2[4] = pf1[1]*pf2[1]+pf1[4]*pf2[4]+pf1[7]*pf2[7];
-	pfres2[5] = pf1[1]*pf2[2]+pf1[4]*pf2[5]+pf1[7]*pf2[8];
+	pfres2[0] = pf1[0] * pf2[0] + pf1[3] * pf2[3] + pf1[6] * pf2[6];
+	pfres2[1] = pf1[0] * pf2[1] + pf1[3] * pf2[4] + pf1[6] * pf2[7];
+	pfres2[2] = pf1[0] * pf2[2] + pf1[3] * pf2[5] + pf1[6] * pf2[8];
+	pfres2[3] = pf1[1] * pf2[0] + pf1[4] * pf2[3] + pf1[7] * pf2[6];
+	pfres2[4] = pf1[1] * pf2[1] + pf1[4] * pf2[4] + pf1[7] * pf2[7];
+	pfres2[5] = pf1[1] * pf2[2] + pf1[4] * pf2[5] + pf1[7] * pf2[8];
+	pfres2[6] = pf1[2] * pf2[0] + pf1[5] * pf2[3] + pf1[8] * pf2[6];
+	pfres2[7] = pf1[2] * pf2[1] + pf1[5] * pf2[4] + pf1[8] * pf2[7];
+	pfres2[8] = pf1[2] * pf2[2] + pf1[5] * pf2[5] + pf1[8] * pf2[8];

-	pfres2[6] = pf1[2]*pf2[0]+pf1[5]*pf2[3]+pf1[8]*pf2[6];
-	pfres2[7] = pf1[2]*pf2[1]+pf1[5]*pf2[4]+pf1[8]*pf2[7];
-	pfres2[8] = pf1[2]*pf2[2]+pf1[5]*pf2[5]+pf1[8]*pf2[8];
-
-	if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
+	if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T));

 	return pfres;
 }
@ -360,11 +386,16 @@ template <class T>
 inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2)
 {
 	T* pfres2;
-	if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T));
-	else pfres2 = pfres;

-	for(int i = 0; i < 4; ++i) {
-		for(int j = 0; j < 4; ++j) {
+	if (pfres == pf1) 
+		pfres2 = (T*)alloca(16 * sizeof(T));
+	else 
+		pfres2 = pfres;
+
+	for (int i = 0; i < 4; ++i)
+	{
+		for (int j = 0; j < 4; ++j)
+		{
 			pfres[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12];
 		}
 	}
@ -381,8 +412,11 @@ inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) {
 template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
 {
 	T* pfres2;
-	if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T));
-	else pfres2 = pfres;
+
+	if (pfres == pf) 
+		pfres2 = (T*)alloca(3 * stride * sizeof(T));
+	else 
+		pfres2 = pfres;

 	// inverse = C^t / det(pf) where C is the matrix of coefficients

@ -390,29 +424,40 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
 	pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1];
 	pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2];
 	pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1];
+
 	pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2];
 	pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0];
 	pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2];
+
 	pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0];
 	pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1];
 	pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0];

 	T fdet = pf[0*stride + 2] * pfres2[2*stride + 0] + pf[1*stride + 2] * pfres2[2*stride + 1] +
-		pf[2*stride + 2] * pfres2[2*stride + 2];
+			 pf[2*stride + 2] * pfres2[2*stride + 2];

-	if( fabs(fdet) < 1e-6 ) return NULL;
+	if (fabs(fdet) < 1e-6) return NULL;

 	fdet = 1 / fdet;
+
 	//if( pfdet != NULL ) *pfdet = fdet;

-	if( pfres != pf ) {
-		pfres[0*stride+0] *= fdet;		pfres[0*stride+1] *= fdet;		pfres[0*stride+2] *= fdet;
-		pfres[1*stride+0] *= fdet;		pfres[1*stride+1] *= fdet;		pfres[1*stride+2] *= fdet;
-		pfres[2*stride+0] *= fdet;		pfres[2*stride+1] *= fdet;		pfres[2*stride+2] *= fdet;
+	if (pfres != pf)
+	{
+		pfres[0*stride+0] *= fdet;
+		pfres[0*stride+1] *= fdet;
+		pfres[0*stride+2] *= fdet;
+		pfres[1*stride+0] *= fdet;
+		pfres[1*stride+1] *= fdet;
+		pfres[1*stride+2] *= fdet;
+		pfres[2*stride+0] *= fdet;
+		pfres[2*stride+1] *= fdet;
+		pfres[2*stride+2] *= fdet;
 		return pfres;
 	}

 	pfres[0*stride+0] = pfres2[0*stride+0] * fdet;
+
 	pfres[0*stride+1] = pfres2[0*stride+1] * fdet;
 	pfres[0*stride+2] = pfres2[0*stride+2] * fdet;
 	pfres[1*stride+0] = pfres2[1*stride+0] * fdet;
@ -430,8 +475,11 @@ inline dReal* inv3(const dReal* pf, dReal* pfres, int stride) { return _inv3<dRe
 template <class T> inline T* _inv4(const T* pf, T* pfres)
 {
 	T* pfres2;
-	if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T));
-	else pfres2 = pfres;
+
+	if (pfres == pf) 
+		pfres2 = (T*)alloca(16 * sizeof(T));
+	else 
+		pfres2 = pfres;

 	// inverse = C^t / det(pf) where C is the matrix of coefficients

@ -439,7 +487,9 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)

 	// determinants of all possibel 2x2 submatrices formed by last two rows
 	T fd0, fd1, fd2;
+
 	T f1, f2, f3;
+
 	fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0];
 	fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1];
 	fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2];
@ -482,20 +532,24 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
 	pfres2[3*4 + 3] =   pf[2*4 + 0] * fd1 - pf[2*4 + 1] * f3 + pf[2*4 + 2] * fd0;

 	T fdet = pf[0*4 + 3] * pfres2[3*4 + 0] + pf[1*4 + 3] * pfres2[3*4 + 1] +
-			pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];
+			 pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];

-	if( fabs(fdet) < 1e-6) return NULL;
+	if (fabs(fdet) < 1e-6) return NULL;

 	fdet = 1 / fdet;
+
 	//if( pfdet != NULL ) *pfdet = fdet;

-	if( pfres2 == pfres ) {
+	if (pfres2 == pfres)
+	{
 		mult(pfres, fdet, 16);
 		return pfres;
 	}

 	int i = 0;
-	while(i < 16) {
+
+	while (i < 16)
+	{
 		pfres[i] = pfres2[i] * fdet;
 		++i;
 	}
@ -507,18 +561,26 @@ inline dReal* inv4(const dReal* pf, dReal* pfres) { return _inv4<dReal>(pf, pfre

 template <class T> inline T* _transpose3(const T* pf, T* pfres)
 {
-	assert( pf != NULL && pfres != NULL );
+	assert(pf != NULL && pfres != NULL);

-	if( pf == pfres ) {
+	if (pf == pfres)
+	{
 		rswap(pfres[1], pfres[3]);
 		rswap(pfres[2], pfres[6]);
 		rswap(pfres[5], pfres[7]);
 		return pfres;
 	}

-	pfres[0] = pf[0];	pfres[1] = pf[3];	pfres[2] = pf[6];
-	pfres[3] = pf[1];	pfres[4] = pf[4];	pfres[5] = pf[7];
-	pfres[6] = pf[2];	pfres[7] = pf[5];	pfres[8] = pf[8];
+	pfres[0] = pf[0];
+
+	pfres[1] = pf[3];
+	pfres[2] = pf[6];
+	pfres[3] = pf[1];
+	pfres[4] = pf[4];
+	pfres[5] = pf[7];
+	pfres[6] = pf[2];
+	pfres[7] = pf[5];
+	pfres[8] = pf[8];

 	return pfres;
 }
@ -528,9 +590,10 @@ inline double* transpose3(const double* pf, double* pfres) { return _transpose3(

 template <class T> inline T* _transpose4(const T* pf, T* pfres)
 {
-	assert( pf != NULL && pfres != NULL );
+	assert(pf != NULL && pfres != NULL);

-	if( pf == pfres ) {
+	if (pf == pfres)
+	{
 		rswap(pfres[1], pfres[4]);
 		rswap(pfres[2], pfres[8]);
 		rswap(pfres[3], pfres[12]);
@ -540,10 +603,23 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
 		return pfres;
 	}

-	pfres[0] = pf[0];	pfres[1] = pf[4];	pfres[2] = pf[8];		pfres[3] = pf[12];
-	pfres[4] = pf[1];	pfres[5] = pf[5];	pfres[6] = pf[9];		pfres[7] = pf[13];
-	pfres[8] = pf[2];	pfres[9] = pf[6];	pfres[10] = pf[10];		pfres[11] = pf[14];
-	pfres[12] = pf[3];	pfres[13] = pf[7];	pfres[14] = pf[11];		pfres[15] = pf[15];
+	pfres[0] = pf[0];
+
+	pfres[1] = pf[4];
+	pfres[2] = pf[8];
+	pfres[3] = pf[12];
+	pfres[4] = pf[1];
+	pfres[5] = pf[5];
+	pfres[6] = pf[9];
+	pfres[7] = pf[13];
+	pfres[8] = pf[2];
+	pfres[9] = pf[6];
+	pfres[10] = pf[10];
+	pfres[11] = pf[14];
+	pfres[12] = pf[3];
+	pfres[13] = pf[7];
+	pfres[14] = pf[11];
+	pfres[15] = pf[15];
 	return pfres;
 }

@ -552,37 +628,37 @@ inline double* transpose4(const double* pf, double* pfres) { return _transpose4(

 inline dReal dot2(const dReal* pf1, const dReal* pf2)
 {
-	assert( pf1 != NULL && pf2 != NULL );
+	assert(pf1 != NULL && pf2 != NULL);
 	return pf1[0]*pf2[0] + pf1[1]*pf2[1];
 }

 inline dReal dot3(const dReal* pf1, const dReal* pf2)
 {
-	assert( pf1 != NULL && pf2 != NULL );
+	assert(pf1 != NULL && pf2 != NULL);
 	return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2];
 }

 inline dReal dot4(const dReal* pf1, const dReal* pf2)
 {
-	assert( pf1 != NULL && pf2 != NULL );
+	assert(pf1 != NULL && pf2 != NULL);
 	return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2] + pf1[3] * pf2[3];
 }

 inline dReal lengthsqr2(const dReal* pf)
 {
-	assert( pf != NULL );
+	assert(pf != NULL);
 	return pf[0] * pf[0] + pf[1] * pf[1];
 }

 inline dReal lengthsqr3(const dReal* pf)
 {
-	assert( pf != NULL );
+	assert(pf != NULL);
 	return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];
 }

 inline dReal lengthsqr4(const dReal* pf)
 {
-	assert( pf != NULL );
+	assert(pf != NULL);
 	return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];
 }

@ -590,7 +666,7 @@ inline dReal* normalize2(dReal* pfout, const dReal* pf)
 {
 	assert(pf != NULL);

-	dReal f = pf[0]*pf[0] + pf[1]*pf[1];
+	dReal f = pf[0] * pf[0] + pf[1] * pf[1];
 	f = 1.0f / sqrtf(f);
 	pfout[0] = pf[0] * f;
 	pfout[1] = pf[1] * f;
@ -602,7 +678,7 @@ inline dReal* normalize3(dReal* pfout, const dReal* pf)
 {
 	assert(pf != NULL);

-	dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2];
+	dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];

 	f = 1.0f / sqrtf(f);
 	pfout[0] = pf[0] * f;
@ -616,7 +692,7 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf)
 {
 	assert(pf != NULL);

-	dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2] + pf[3]*pf[3];
+	dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];

 	f = 1.0f / sqrtf(f);
 	pfout[0] = pf[0] * f;
@ -629,22 +705,25 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf)

 inline dReal* cross3(dReal* pfout, const dReal* pf1, const dReal* pf2)
 {
-	assert( pfout != NULL && pf1 != NULL && pf2 != NULL );
+	assert(pfout != NULL && pf1 != NULL && pf2 != NULL);

 	dReal temp[3];
 	temp[0] = pf1[1] * pf2[2] - pf1[2] * pf2[1];
 	temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2];
 	temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0];

-	pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2];
+	pfout[0] = temp[0];
+	pfout[1] = temp[1];
+	pfout[2] = temp[2];
 	return pfout;
 }

 template <class T> inline void mult(T* pf, T fa, int r)
 {
-	assert( pf != NULL );
+	assert(pf != NULL);

-	while(r > 0) {
+	while (r > 0)
+	{
 		--r;
 		pf[r] *= fa;
 	}
@ -653,25 +732,32 @@ template <class T> inline void mult(T* pf, T fa, int r)
 template <class T, class S, class R>
 inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
 {
-	assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
+	assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
 	int j, k;

-	if( !badd ) memset(pfres, 0, sizeof(S) * r1 * c2);
+	if (!badd) memset(pfres, 0, sizeof(S) * r1 * c2);

-	while(r1 > 0) {
+	while (r1 > 0)
+	{
 		--r1;

 		j = 0;
-		while(j < c2) {
+
+		while (j < c2)
+		{
 			k = 0;
-			while(k < c1) {
+
+			while (k < c1)
+			{
 				pfres[j] += pf1[k] * pf2[k*c2 + j];
 				++k;
 			}
+
 			++j;
 		}

 		pf1 += c1;
+
 		pfres += c2;
 	}

@ -681,26 +767,32 @@ inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
 template <class T, class S, class R>
 inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
 {
-	assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
+	assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
 	int i, j, k;

-	if( !badd ) memset(pfres, 0, sizeof(S) * c1 * c2);
+	if (!badd) memset(pfres, 0, sizeof(S) * c1 * c2);

 	i = 0;
-	while(i < c1) {

+	while (i < c1)
+	{
 		j = 0;
-		while(j < c2) {

+		while (j < c2)
+		{
 			k = 0;
-			while(k < r1) {
+
+			while (k < r1)
+			{
 				pfres[j] += pf1[k*c1] * pf2[k*c2 + j];
 				++k;
 			}
+
 			++j;
 		}

 		pfres += c2;
+
 		++pf1;

 		++i;
@ -712,25 +804,32 @@ inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
 template <class T, class S, class R>
 inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool badd)
 {
-	assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
+	assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
 	int j, k;

-	if( !badd ) memset(pfres, 0, sizeof(S) * r1 * r2);
+	if (!badd) memset(pfres, 0, sizeof(S) * r1 * r2);

-	while(r1 > 0) {
+	while (r1 > 0)
+	{
 		--r1;

 		j = 0;
-		while(j < r2) {
+
+		while (j < r2)
+		{
 			k = 0;
-			while(k < c1) {
+
+			while (k < c1)
+			{
 				pfres[j] += pf1[k] * pf2[j*c1 + k];
 				++k;
 			}
+
 			++j;
 		}

 		pf1 += c1;
+
 		pfres += r2;
 	}

@ -739,88 +838,107 @@ inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool b

 template <class T> inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp)
 {
-	assert( pf1 != NULL && pf2 != NULL );
+	assert(pf1 != NULL && pf2 != NULL);

 	int j, k;
 	bool bdel = false;

-	if( pftemp == NULL ) {
+	if (pftemp == NULL)
+	{
 		pftemp = new T[c];
 		bdel = true;
 	}

-	while(r > 0) {
+	while (r > 0)
+	{
 		--r;

 		j = 0;
-		while(j < c) {
+
+		while (j < c)
+		{

 			pftemp[j] = 0.0;

 			k = 0;
-			while(k < c) {
+
+			while (k < c)
+			{
 				pftemp[j] += pf1[k] * pf2[k*c + j];
 				++k;
 			}
+
 			++j;
 		}

 		memcpy(pf1, pftemp, c * sizeof(T));
+
 		pf1 += c;
 	}

-	if( bdel ) delete[] pftemp;
+	if (bdel) delete[] pftemp;

 	return pf1;
 }

 template <class T, class S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S* pftemp)
 {
-	assert( pf1 != NULL && pf2 != NULL );
+	assert(pf1 != NULL && pf2 != NULL);

 	int i, j, k;
 	bool bdel = false;

-	if( pftemp == NULL ) {
+	if (pftemp == NULL)
+	{
 		pftemp = new S[r2];
 		bdel = true;
 	}

 	// do columns first
 	j = 0;
-	while(j < c2) {
+
+	while (j < c2)
+	{
 		i = 0;
-		while(i < r2) {
+
+		while (i < r2)
+		{

 			pftemp[i] = 0.0;

 			k = 0;
-			while(k < r2) {
+
+			while (k < r2)
+			{
 				pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j];
 				++k;
 			}
+
 			++i;
 		}

 		i = 0;
-		while(i < r2) {
-			*(pf2+i*c2+j) = pftemp[i];
+
+		while (i < r2)
+		{
+			*(pf2 + i*c2 + j) = pftemp[i];
 			++i;
 		}

 		++j;
 	}

-	if( bdel ) delete[] pftemp;
+	if (bdel) delete[] pftemp;

 	return pf1;
 }

 template <class T> inline void add(T* pf1, T* pf2, int r)
 {
-	assert( pf1 != NULL && pf2 != NULL);
+	assert(pf1 != NULL && pf2 != NULL);

-	while(r > 0) {
+	while (r > 0)
+	{
 		--r;
 		pf1[r] += pf2[r];
 	}
@ -828,9 +946,10 @@ template <class T> inline void add(T* pf1, T* pf2, int r)

 template <class T> inline void sub(T* pf1, T* pf2, int r)
 {
-	assert( pf1 != NULL && pf2 != NULL);
+	assert(pf1 != NULL && pf2 != NULL);

-	while(r > 0) {
+	while (r > 0)
+	{
 		--r;
 		pf1[r] -= pf2[r];
 	}
@ -838,10 +957,12 @@ template <class T> inline void sub(T* pf1, T* pf2, int r)

 template <class T> inline T normsqr(T* pf1, int r)
 {
-	assert( pf1 != NULL );
+	assert(pf1 != NULL);

 	T d = 0.0;
-	while(r > 0) {
+
+	while (r > 0)
+	{
 		--r;
 		d += pf1[r] * pf1[r];
 	}
@ -852,7 +973,9 @@ template <class T> inline T normsqr(T* pf1, int r)
 template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
 {
 	T d = 0;
-	while(length > 0) {
+
+	while (length > 0)
+	{
 		--length;
 		d += sqr(pf1[length] - pf2[length]);
 	}
@ -863,7 +986,9 @@ template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
 template <class T> inline T dot(T* pf1, T* pf2, int length)
 {
 	T d = 0;
-	while(length > 0) {
+
+	while (length > 0)
+	{
 		--length;
 		d += pf1[length] * pf2[length];
 	}
@ -874,7 +999,9 @@ template <class T> inline T dot(T* pf1, T* pf2, int length)
 template <class T> inline T sum(T* pf, int length)
 {
 	T d = 0;
-	while(length > 0) {
+
+	while (length > 0)
+	{
 		--length;
 		d += pf[length];
 	}
@ -886,18 +1013,23 @@ template <class T> inline bool inv2(T* pf, T* pfres)
 {
 	T fdet = pf[0] * pf[3] - pf[1] * pf[2];

-	if( fabs(fdet) < 1e-16 ) return false;
+	if (fabs(fdet) < 1e-16) return false;

 	fdet = 1 / fdet;
+
 	//if( pfdet != NULL ) *pfdet = fdet;

-	if( pfres != pf ) {
-		pfres[0] = fdet * pf[3];		pfres[1] = -fdet * pf[1];
-		pfres[2] = -fdet * pf[2];		pfres[3] = fdet * pf[0];
+	if (pfres != pf)
+	{
+		pfres[0] = fdet * pf[3];
+		pfres[1] = -fdet * pf[1];
+		pfres[2] = -fdet * pf[2];
+		pfres[3] = fdet * pf[0];
 		return true;
 	}

 	dReal ftemp = pf[0];
+
 	pfres[0] = pf[3] * fdet;
 	pfres[1] *= -fdet;
 	pfres[2] *= -fdet;