From 532ab905c452bd848e9c82c57dbd68f51b22c02e Mon Sep 17 00:00:00 2001
From: Rodolfo Osvaldo Bogado <rodolfoosvaldobogado@gmail.com>
Date: Tue, 12 Jan 2010 03:39:14 +0000
Subject: [PATCH] this is a dev commit, a cleanup in the pixel shader generator
 to make the generate code a lot cleaner and readable, and a little fill rate
 optimization when using alpha test, i don't thing any game is limited by fill
 rate but the code looks more cleaner and is ordered in the same way is
 executed in hardware. please test, if any problem apears let me know

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4812 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 .../Core/VideoCommon/Src/PixelShaderGen.cpp   | 188 ++++++++++--------
 .../Core/VideoCommon/Src/VertexShaderGen.cpp  |  24 +--
 .../Plugin_VideoDX9/Src/VertexManager.cpp     |  18 +-
 3 files changed, 118 insertions(+), 112 deletions(-)

diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
index 6b472eb317..5bd2ff6f4b 100644
--- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
@@ -481,15 +481,23 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
             "  float3 tevcoord;\n"
             "  float2 wrappedcoord, tempcoord;\n\n");
 
-    for (int i = 0; i < numTexgen; ++i) 
+	// HACK to handle cases where the tex gen is not enabled
+    if (numTexgen == 0)
 	{
-        // optional perspective divides
-        if (xfregs.texcoords[i].texmtxinfo.projection == XF_TEXPROJ_STQ)
-            WRITE(p, "uv%d.xy = uv%d.xy/uv%d.z;\n", i, i, i);
+        WRITE(p, "float3 uv0 = float3(0.0f,0.0f,0.0f);\n");
+	}
+	else
+	{
+		for (int i = 0; i < numTexgen; ++i) 
+		{
+			// optional perspective divides
+			if (xfregs.texcoords[i].texmtxinfo.projection == XF_TEXPROJ_STQ)
+				WRITE(p, "uv%d.xy = uv%d.xy/uv%d.z;\n", i, i, i);
 
-        // scale texture coordinates
-        WRITE(p, "uv%d.xy = uv%d.xy * "I_TEXDIMS"[%d].zw;\n", i, i, i);        
-    }
+			// scale texture coordinates
+			WRITE(p, "uv%d.xy = uv%d.xy * "I_TEXDIMS"[%d].zw;\n", i, i, i);        
+		}
+	}
 
     // indirect texture map lookup
     for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) 
@@ -509,38 +517,13 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
         }
     }
 
-    // HACK to handle cases where the tex gen is not enabled
-    if (numTexgen == 0)
-        WRITE(p, "float3 uv0 = float3(0.0f,0.0f,0.0f);\n");
-
-    for (int i = 0; i < numStages; i++)
-        WriteStage(p, i, texture_mask,HLSL); //build the equation for this stage
-
-	if (numTexgen >= 7)
-		WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n");
-
-	// the screen space depth value = far z + (clip z / clip w) * z range
-	WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n");
-
-    if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)
-    {
-        // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
-        if (bpmem.ztex2.op == ZTEXTURE_ADD)
-            WRITE(p, "zCoord = dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord;\n");
-        else
-            WRITE(p, "zCoord = dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w;\n");
-
-        // scale to make result from frac correct
-        WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
-        WRITE(p, "zCoord = frac(zCoord);\n");
-        WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
-    }
     
-    WRITE(p, "depth = zCoord;\n");
+
+	for (int i = 0; i < numStages; i++)
+        WriteStage(p, i, texture_mask,HLSL); //build the equation for this stage
 	WRITE(p, "prev = saturate(prev);\n");
-
-    //if (bpmem.genMode.numindstages ) WRITE(p, "prev.rg = indtex0.xy;\nprev.b = 0;\n");
-
+	
+		
     if (!WriteAlphaTest(p, HLSL))
 	{
         // alpha test will always fail, so restart the shader and just make it an empty function
@@ -550,8 +533,30 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
     }
     else
 	{
-        if (dstAlphaEnable) 
-            WRITE(p, "  ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n");
+		if (numTexgen >= 7)
+			WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n");
+
+		// the screen space depth value = far z + (clip z / clip w) * z range
+		WRITE(p, "float zCoord = "I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * "I_ZBIAS"[1].y;\n");
+
+		if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)
+		{
+			// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
+			if (bpmem.ztex2.op == ZTEXTURE_ADD)
+				WRITE(p, "zCoord = dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w + zCoord;\n");
+			else
+				WRITE(p, "zCoord = dot("I_ZBIAS"[0].xyzw, textemp.xyzw) + "I_ZBIAS"[1].w;\n");
+
+			// scale to make result from frac correct
+			WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
+			WRITE(p, "zCoord = frac(zCoord);\n");
+			WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
+		}
+	    
+		WRITE(p, "depth = zCoord;\n");
+        
+		if (dstAlphaEnable) 
+            WRITE(p, "  ocol0 = float4(prev.rgb,"I_ALPHA"[0].a);\n");
 		else
 		{
             WriteFog(p);
@@ -581,14 +586,14 @@ static const char *TEVCMPColorOPTable[16] =
 	"float3(0.0f,0.0f,0.0f)",//5
 	"float3(0.0f,0.0f,0.0f)",//6
 	"float3(0.0f,0.0f,0.0f)",//7
-	"   %s + ((%s.r > %s.r + (1.0f/510.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
-	"   %s + ((abs(%s.r - %s.r) < (1.0f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
-	"   %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
-	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
-	"   %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
-	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
-	"   %s + (max(sign(%s.rgb - %s.rgb - (1.0f/510.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT  14
-	"   %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (1.0f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ  15
+	"   %s + ((%s.r > %s.r + (0.25f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
+	"   %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
+	"   %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
+	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
+	"   %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
+	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
+	"   %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT  14
+	"   %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ  15
 };
 
 //table with the alpha compare operations
@@ -602,14 +607,14 @@ static const char *TEVCMPAlphaOPTable[16] =
 	"0.0f",//5
 	"0.0f",//6
 	"0.0f",//7
-	"   %s + ((%s.r > (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
-	"   %s + (abs(%s.r - %s.r) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
-	"   %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
-	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
-	"   %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
-	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13	
-	"   %s + ((%s.a > (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
-	"   %s + (abs(%s.a - %s.a) < (1.0f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
+	"   %s + ((%s.r > (%s.r + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
+	"   %s + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
+	"   %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
+	"   %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
+	"   %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
+	"   %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13	
+	"   %s + ((%s.a > (%s.a + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
+	"   %s + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
 
 };
 
@@ -727,34 +732,39 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
         WRITE(p, "konsttemp=float4(%s,%s);\n",tevKSelTableC[kc],tevKSelTableA[ka]);  
 
     if (cc.clamp)
-		WRITE(p, "%s= saturate(", tevCOutputTable[cc.dest]);
+		WRITE(p, "%s=saturate(", tevCOutputTable[cc.dest]);
 	else
-		WRITE(p, "%s= (", tevCOutputTable[cc.dest]);
+		WRITE(p, "%s=", tevCOutputTable[cc.dest]);
 
     // combine the color channel
     if (cc.bias != 3) // if not compare
 	{
 		//normal color combiner goes here        
 		if (cc.shift>0)
-			WRITE(p, "   %s*(%s%s",tevScaleTable[cc.shift],tevCInputTable[cc.d],tevOpTable[cc.op]);
-		else
-			WRITE(p, "   (%s%s",tevCInputTable[cc.d],tevOpTable[cc.op]);
+			WRITE(p, "%s*(",tevScaleTable[cc.shift]);		
+
+		if(!(cc.d == 15 && cc.op == 0))
+			WRITE(p, "%s%s",tevCInputTable[cc.d],tevOpTable[cc.op]);
 
 		if (cc.a == 15 && cc.b == 15)
-			WRITE(p, "0.0f");
+			WRITE(p, "float3(0.0f,0.0f,0.0f)");
 		else if (cc.a == 15 && cc.c == 15)
-			WRITE(p, "0.0f");
+			WRITE(p, "float3(0.0f,0.0f,0.0f)");
 		else if (cc.b == 15 && cc.c == 15)
 			WRITE(p,"%s",tevCInputTable[cc.a]);
 		else if (cc.a == 15)
-			WRITE(p,"(%s)*(%s)",tevCInputTable[cc.b],tevCInputTable[cc.c]);
+			WRITE(p,"%s*%s",tevCInputTable[cc.b],tevCInputTable[cc.c]);
 		else if (cc.b == 15)
-			WRITE(p,"(%s)*(1-%s)",tevCInputTable[cc.a],tevCInputTable[cc.c]);
+			WRITE(p,"%s*(float3(1.0f,1.0f,1.0f)-%s)",tevCInputTable[cc.a],tevCInputTable[cc.c]);
 		else if (cc.c == 15)
 			WRITE(p,"%s",tevCInputTable[cc.a]);
 		else
 			WRITE(p, "lerp(%s,%s,%s)",tevCInputTable[cc.a], tevCInputTable[cc.b],tevCInputTable[cc.c]);
-		WRITE(p, " %s)",tevBiasTable[cc.bias]);
+		
+		WRITE(p, "%s",tevBiasTable[cc.bias]);
+		
+		if(cc.shift>0)
+			WRITE(p, ")");
     }
     else 
 	{
@@ -765,22 +775,24 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 				tevCInputTable2[cc.b],
 				tevCInputTable[cc.c]);       
     }
-
-	WRITE(p,");\n");
+	if (cc.clamp)
+		WRITE(p,")");
+	WRITE(p,";\n");
     
     // combine the alpha channel
     if (ac.clamp)
-	    WRITE(p, "%s= saturate(", tevAOutputTable[ac.dest]);
+	    WRITE(p, "%s=saturate(", tevAOutputTable[ac.dest]);
 	else
-		WRITE(p, "%s= (", tevAOutputTable[ac.dest]);
+		WRITE(p, "%s=", tevAOutputTable[ac.dest]);
 
     if (ac.bias != 3) // if not compare
 	{
-        //normal alpha combiner goes here		
+        //normal alpha combiner goes here
 		if (ac.shift>0)
-			WRITE(p, "   %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]);
-		else
-			WRITE(p, "   (%s%s",tevAInputTable[ac.d],tevOpTable[ac.op]);
+			WRITE(p, "%s*(",tevScaleTable[ac.shift]);		
+
+		if(!(ac.d == 7 && ac.op == 0))
+			WRITE(p, "%s%s",tevAInputTable[ac.d],tevOpTable[ac.op]);		
 
 		if (ac.a == 7 && ac.b == 7)
 			WRITE(p, "0.0f");
@@ -789,14 +801,18 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 		else if (ac.b == 7 && ac.c == 7)
 			WRITE(p,"%s",tevAInputTable[ac.a]);
 		else if (ac.a == 7)
-			WRITE(p,"(%s)*(%s)",tevAInputTable[ac.b],tevAInputTable[ac.c]);
+			WRITE(p,"%s*%s",tevAInputTable[ac.b],tevAInputTable[ac.c]);
 		else if (ac.b == 7)
-			WRITE(p,"(%s)*(1-%s)",tevAInputTable[ac.a],tevAInputTable[ac.c]);
+			WRITE(p,"%s*(1.0f-%s)",tevAInputTable[ac.a],tevAInputTable[ac.c]);
 		else if (ac.c == 7)
 			WRITE(p,"%s",tevAInputTable[ac.a]);
 		else
 	        WRITE(p, "lerp(%s,%s,%s)",tevAInputTable[ac.a],tevAInputTable[ac.b],tevAInputTable[ac.c]);
-		WRITE(p, " %s)",tevBiasTable[ac.bias]);
+		
+		WRITE(p, "%s",tevBiasTable[ac.bias]);
+		
+		if (ac.shift>0)
+			WRITE(p, ")");
     }
     else 
 	{
@@ -808,8 +824,9 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
 				tevAInputTable2[ac.b],
 				tevAInputTable[ac.c]);       		
     }
-
-    WRITE(p, ");\n\n");
+	if (ac.clamp)
+		WRITE(p, ")");
+	WRITE(p, ";\n\n");
 }
 
 void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, u32 HLSL)
@@ -854,12 +871,12 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
 static const char *tevAlphaFuncsTable[] = 
 {
     "(false)",									//ALPHACMP_NEVER 0
-	"(prev.a < %s - (0.5f/255.0f))",			//ALPHACMP_LESS 1
-	"(abs( prev.a - %s ) < (1.0f/255.0f))",		//ALPHACMP_EQUAL 2
-	"(prev.a < %s + (0.5f/255.0f))",			//ALPHACMP_LEQUAL 3
-	"(prev.a > %s + (0.5f/255.0f))",			//ALPHACMP_GREATER 4
-	"(abs( prev.a - %s ) > (1.0f/255.0f))",		//ALPHACMP_NEQUAL 5
-	"(prev.a > %s - (0.5f/255.0f))",			//ALPHACMP_GEQUAL 6
+	"(prev.a <= %s - (0.25f/255.0f))",			//ALPHACMP_LESS 1
+	"(abs( prev.a - %s ) < (0.5f/255.0f))",		//ALPHACMP_EQUAL 2
+	"(prev.a < %s + (0.25f/255.0f))",			//ALPHACMP_LEQUAL 3
+	"(prev.a >= %s + (0.25f/255.0f))",			//ALPHACMP_GREATER 4
+	"(abs( prev.a - %s ) >= (0.5f/255.0f))",	//ALPHACMP_NEQUAL 5
+	"(prev.a > %s - (0.25f/255.0f))",			//ALPHACMP_GEQUAL 6
 	"(true)"									//ALPHACMP_ALWAYS 7
 };
 
@@ -903,7 +920,7 @@ static bool WriteAlphaTest(char *&p, u32 HLSL)
     }
 
 
-	// Seems we need discard for Cg and clip for d3d. sigh.
+	// using discard then return works the same in cg and hlsl
 	WRITE(p, "if(!( ");
 
 	int compindex = bpmem.alphaFunc.comp0 % 8;
@@ -948,7 +965,6 @@ static void WriteFog(char *&p)
         WRITE (p, "  float ze = "I_FOG"[1].x * depth;\n");
     }
 
-    //WRITE (p, "  float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n");
 	WRITE (p, "  float fog = saturate(ze - "I_FOG"[1].z);\n");
 
 	if(bpmem.fog.c_proj_fsel.fsel > 3)
diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp
index fcba116bf2..ad8d227813 100644
--- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp
@@ -331,6 +331,7 @@ const char *GenerateVertexShader(u32 components, bool D3D)
 		(xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
 
     // transform texcoords
+	WRITE(p, "float4 coord = float4(0.0f,0.0f,1.0f,1.0f);\n"); 
     for (int i = 0; i < xfregs.numTexGens; ++i) {
         TexMtxInfo& texinfo = xfregs.texcoords[i].texmtxinfo;
 
@@ -338,14 +339,13 @@ const char *GenerateVertexShader(u32 components, bool D3D)
         switch (texinfo.sourcerow) {
         case XF_SRCGEOM_INROW:
             _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
-            WRITE(p, "float4 coord = rawpos;\n"); // pos.w is 1
+            WRITE(p, "coord = rawpos;\n"); // pos.w is 1
             break;
         case XF_SRCNORMAL_INROW:
             if (components & VB_HAS_NRM0) {
                 _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
-                WRITE(p, "float4 coord = float4(rawnorm0.xyz, 1.0f);\n");
-            }
-            else WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");  // avoid errors
+                WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n");
+            }            
             break;
         case XF_SRCCOLORS_INROW:
             _assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 );
@@ -353,23 +353,19 @@ const char *GenerateVertexShader(u32 components, bool D3D)
         case XF_SRCBINORMAL_T_INROW:
             if (components & VB_HAS_NRM1) {
                 _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
-                WRITE(p, "float4 coord = float4(rawnorm1.xyz, 1.0f);\n");
-            }
-            else WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");  // avoid errors
+                WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n");
+            }            
             break;
         case XF_SRCBINORMAL_B_INROW:
             if (components & VB_HAS_NRM2) {
                 _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
-                WRITE(p, "float4 coord = float4(rawnorm2.xyz, 1.0f);\n");
-            }
-            else WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");  // avoid errors
+                WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n");
+            }            
             break;
         default:
             _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
             if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) )
-                WRITE(p, "float4 coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
-            else
-                WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");  // avoid errors
+                WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);            
             break;
         }
 
@@ -497,7 +493,7 @@ char* GenerateLightShader(char* p, int index, const LitChannel& chan, const char
             WRITE(p, "attn = max(0.0f, dot("I_LIGHTS".lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot("I_LIGHTS".lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", index, index);
         }
         else if (chan.attnfunc == 1) { // specular
-            WRITE(p, "attn = dot(_norm0, "I_LIGHTS".lights[%d].pos.xyz) > 0.0f ? max(0.0f, dot(_norm0, "I_LIGHTS".lights[%d].dir.xyz)) : 0.0f;\n", index, index);
+            WRITE(p, "attn = (dot(_norm0, "I_LIGHTS".lights[%d].pos.xyz) > 0.0f) ? max(0.0f, dot(_norm0, "I_LIGHTS".lights[%d].dir.xyz)) : 0.0f;\n", index, index);
             WRITE(p, "ldir = float3(1,attn,attn*attn);\n");
             WRITE(p, "attn = max(0.0f, dot("I_LIGHTS".lights[%d].cosatt.xyz, ldir)) / dot("I_LIGHTS".lights[%d].distatt.xyz, ldir);\n", index, index);
         }
diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
index cde1200475..651515c901 100644
--- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
@@ -283,7 +283,7 @@ void Flush()
 
 	int stride = g_nativeVertexFmt->GetVertexStride();
 	g_nativeVertexFmt->SetupVertexPointers();
-
+	
 	Draw(stride);
 
 	if (bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) 
@@ -296,19 +296,13 @@ void Flush()
 		}
 
 		// update alpha only
-		D3D::SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
-		D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, false);
-
+		D3D::dev->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
+		D3D::dev->SetRenderState(D3DRS_ALPHABLENDENABLE, false);
+		
 		Draw(stride);
 
-		if (bpmem.blendmode.alphaupdate) 
-			write = D3DCOLORWRITEENABLE_ALPHA;
-		if (bpmem.blendmode.colorupdate) 
-			write |= D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE;
-		if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract)
-			D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, true);
-
-		D3D::SetRenderState(D3DRS_COLORWRITEENABLE, write);
+		D3D::RefreshRenderState(D3DRS_COLORWRITEENABLE);
+		D3D::RefreshRenderState(D3DRS_ALPHABLENDENABLE);		
 	}
 	DEBUGGER_PAUSE_AT(NEXT_FLUSH,true);