this is a dev commit, a cleanup in the pixel shader generator to make the generate code a lot cleaner and readable, and a little fill rate optimization when using alpha test, i don't thing any game is limited by fill rate but the code looks more cleaner and is ordered in the same way is executed in hardware.

please test, if any problem apears let me know

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4812 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2010-01-12 03:39:14 +00:00
parent 573a7f4d6e
commit 532ab905c4
3 changed files with 118 additions and 112 deletions

View File

@ -481,6 +481,13 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
" float3 tevcoord;\n"
" float2 wrappedcoord, tempcoord;\n\n");
// HACK to handle cases where the tex gen is not enabled
if (numTexgen == 0)
{
WRITE(p, "float3 uv0 = float3(0.0f,0.0f,0.0f);\n");
}
else
{
for (int i = 0; i < numTexgen; ++i)
{
// optional perspective divides
@ -490,6 +497,7 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
// scale texture coordinates
WRITE(p, "uv%d.xy = uv%d.xy * "I_TEXDIMS"[%d].zw;\n", i, i, i);
}
}
// indirect texture map lookup
for(u32 i = 0; i < bpmem.genMode.numindstages; ++i)
@ -509,13 +517,22 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
}
}
// HACK to handle cases where the tex gen is not enabled
if (numTexgen == 0)
WRITE(p, "float3 uv0 = float3(0.0f,0.0f,0.0f);\n");
for (int i = 0; i < numStages; i++)
WriteStage(p, i, texture_mask,HLSL); //build the equation for this stage
WRITE(p, "prev = saturate(prev);\n");
if (!WriteAlphaTest(p, HLSL))
{
// alpha test will always fail, so restart the shader and just make it an empty function
p = pmainstart;
WRITE(p, "ocol0 = 0;\n");
WRITE(p, "discard;return;\n");
}
else
{
if (numTexgen >= 7)
WRITE(p, "float4 clipPos = float4(uv0.w, uv1.w, uv2.w, uv3.w);\n");
@ -537,21 +554,9 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL)
}
WRITE(p, "depth = zCoord;\n");
WRITE(p, "prev = saturate(prev);\n");
//if (bpmem.genMode.numindstages ) WRITE(p, "prev.rg = indtex0.xy;\nprev.b = 0;\n");
if (!WriteAlphaTest(p, HLSL))
{
// alpha test will always fail, so restart the shader and just make it an empty function
p = pmainstart;
WRITE(p, "ocol0 = 0;\n");
WRITE(p, "discard;return;\n");
}
else
{
if (dstAlphaEnable)
WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].w);\n");
WRITE(p, " ocol0 = float4(prev.rgb,"I_ALPHA"[0].a);\n");
else
{
WriteFog(p);
@ -581,14 +586,14 @@ static const char *TEVCMPColorOPTable[16] =
"float3(0.0f,0.0f,0.0f)",//5
"float3(0.0f,0.0f,0.0f)",//6
"float3(0.0f,0.0f,0.0f)",//7
" %s + ((%s.r > %s.r + (1.0f/510.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
" %s + ((abs(%s.r - %s.r) < (1.0f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
" %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
" %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
" %s + (max(sign(%s.rgb - %s.rgb - (1.0f/510.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
" %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (1.0f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
" %s + ((%s.r > %s.r + (0.25f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
" %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
" %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
" %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
" %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
" %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
};
//table with the alpha compare operations
@ -602,14 +607,14 @@ static const char *TEVCMPAlphaOPTable[16] =
"0.0f",//5
"0.0f",//6
"0.0f",//7
" %s + ((%s.r > (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
" %s + (abs(%s.r - %s.r) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
" %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
" %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13
" %s + ((%s.a > (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
" %s + (abs(%s.a - %s.a) < (1.0f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
" %s + ((%s.r > (%s.r + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
" %s + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
" %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
" %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13
" %s + ((%s.a > (%s.a + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
" %s + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
};
@ -729,32 +734,37 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
if (cc.clamp)
WRITE(p, "%s=saturate(", tevCOutputTable[cc.dest]);
else
WRITE(p, "%s= (", tevCOutputTable[cc.dest]);
WRITE(p, "%s=", tevCOutputTable[cc.dest]);
// combine the color channel
if (cc.bias != 3) // if not compare
{
//normal color combiner goes here
if (cc.shift>0)
WRITE(p, " %s*(%s%s",tevScaleTable[cc.shift],tevCInputTable[cc.d],tevOpTable[cc.op]);
else
WRITE(p, " (%s%s",tevCInputTable[cc.d],tevOpTable[cc.op]);
WRITE(p, "%s*(",tevScaleTable[cc.shift]);
if(!(cc.d == 15 && cc.op == 0))
WRITE(p, "%s%s",tevCInputTable[cc.d],tevOpTable[cc.op]);
if (cc.a == 15 && cc.b == 15)
WRITE(p, "0.0f");
WRITE(p, "float3(0.0f,0.0f,0.0f)");
else if (cc.a == 15 && cc.c == 15)
WRITE(p, "0.0f");
WRITE(p, "float3(0.0f,0.0f,0.0f)");
else if (cc.b == 15 && cc.c == 15)
WRITE(p,"%s",tevCInputTable[cc.a]);
else if (cc.a == 15)
WRITE(p,"(%s)*(%s)",tevCInputTable[cc.b],tevCInputTable[cc.c]);
WRITE(p,"%s*%s",tevCInputTable[cc.b],tevCInputTable[cc.c]);
else if (cc.b == 15)
WRITE(p,"(%s)*(1-%s)",tevCInputTable[cc.a],tevCInputTable[cc.c]);
WRITE(p,"%s*(float3(1.0f,1.0f,1.0f)-%s)",tevCInputTable[cc.a],tevCInputTable[cc.c]);
else if (cc.c == 15)
WRITE(p,"%s",tevCInputTable[cc.a]);
else
WRITE(p, "lerp(%s,%s,%s)",tevCInputTable[cc.a], tevCInputTable[cc.b],tevCInputTable[cc.c]);
WRITE(p, " %s)",tevBiasTable[cc.bias]);
WRITE(p, "%s",tevBiasTable[cc.bias]);
if(cc.shift>0)
WRITE(p, ")");
}
else
{
@ -765,22 +775,24 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
tevCInputTable2[cc.b],
tevCInputTable[cc.c]);
}
WRITE(p,");\n");
if (cc.clamp)
WRITE(p,")");
WRITE(p,";\n");
// combine the alpha channel
if (ac.clamp)
WRITE(p, "%s=saturate(", tevAOutputTable[ac.dest]);
else
WRITE(p, "%s= (", tevAOutputTable[ac.dest]);
WRITE(p, "%s=", tevAOutputTable[ac.dest]);
if (ac.bias != 3) // if not compare
{
//normal alpha combiner goes here
if (ac.shift>0)
WRITE(p, " %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]);
else
WRITE(p, " (%s%s",tevAInputTable[ac.d],tevOpTable[ac.op]);
WRITE(p, "%s*(",tevScaleTable[ac.shift]);
if(!(ac.d == 7 && ac.op == 0))
WRITE(p, "%s%s",tevAInputTable[ac.d],tevOpTable[ac.op]);
if (ac.a == 7 && ac.b == 7)
WRITE(p, "0.0f");
@ -789,14 +801,18 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
else if (ac.b == 7 && ac.c == 7)
WRITE(p,"%s",tevAInputTable[ac.a]);
else if (ac.a == 7)
WRITE(p,"(%s)*(%s)",tevAInputTable[ac.b],tevAInputTable[ac.c]);
WRITE(p,"%s*%s",tevAInputTable[ac.b],tevAInputTable[ac.c]);
else if (ac.b == 7)
WRITE(p,"(%s)*(1-%s)",tevAInputTable[ac.a],tevAInputTable[ac.c]);
WRITE(p,"%s*(1.0f-%s)",tevAInputTable[ac.a],tevAInputTable[ac.c]);
else if (ac.c == 7)
WRITE(p,"%s",tevAInputTable[ac.a]);
else
WRITE(p, "lerp(%s,%s,%s)",tevAInputTable[ac.a],tevAInputTable[ac.b],tevAInputTable[ac.c]);
WRITE(p, " %s)",tevBiasTable[ac.bias]);
WRITE(p, "%s",tevBiasTable[ac.bias]);
if (ac.shift>0)
WRITE(p, ")");
}
else
{
@ -808,8 +824,9 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
tevAInputTable2[ac.b],
tevAInputTable[ac.c]);
}
WRITE(p, ");\n\n");
if (ac.clamp)
WRITE(p, ")");
WRITE(p, ";\n\n");
}
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, u32 HLSL)
@ -854,12 +871,12 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
static const char *tevAlphaFuncsTable[] =
{
"(false)", //ALPHACMP_NEVER 0
"(prev.a < %s - (0.5f/255.0f))", //ALPHACMP_LESS 1
"(abs( prev.a - %s ) < (1.0f/255.0f))", //ALPHACMP_EQUAL 2
"(prev.a < %s + (0.5f/255.0f))", //ALPHACMP_LEQUAL 3
"(prev.a > %s + (0.5f/255.0f))", //ALPHACMP_GREATER 4
"(abs( prev.a - %s ) > (1.0f/255.0f))", //ALPHACMP_NEQUAL 5
"(prev.a > %s - (0.5f/255.0f))", //ALPHACMP_GEQUAL 6
"(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1
"(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2
"(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3
"(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4
"(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5
"(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6
"(true)" //ALPHACMP_ALWAYS 7
};
@ -903,7 +920,7 @@ static bool WriteAlphaTest(char *&p, u32 HLSL)
}
// Seems we need discard for Cg and clip for d3d. sigh.
// using discard then return works the same in cg and hlsl
WRITE(p, "if(!( ");
int compindex = bpmem.alphaFunc.comp0 % 8;
@ -948,7 +965,6 @@ static void WriteFog(char *&p)
WRITE (p, " float ze = "I_FOG"[1].x * depth;\n");
}
//WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n");
WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n");
if(bpmem.fog.c_proj_fsel.fsel > 3)

View File

@ -331,6 +331,7 @@ const char *GenerateVertexShader(u32 components, bool D3D)
(xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
// transform texcoords
WRITE(p, "float4 coord = float4(0.0f,0.0f,1.0f,1.0f);\n");
for (int i = 0; i < xfregs.numTexGens; ++i) {
TexMtxInfo& texinfo = xfregs.texcoords[i].texmtxinfo;
@ -338,14 +339,13 @@ const char *GenerateVertexShader(u32 components, bool D3D)
switch (texinfo.sourcerow) {
case XF_SRCGEOM_INROW:
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = rawpos;\n"); // pos.w is 1
WRITE(p, "coord = rawpos;\n"); // pos.w is 1
break;
case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0) {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = float4(rawnorm0.xyz, 1.0f);\n");
WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n");
}
else WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); // avoid errors
break;
case XF_SRCCOLORS_INROW:
_assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 );
@ -353,23 +353,19 @@ const char *GenerateVertexShader(u32 components, bool D3D)
case XF_SRCBINORMAL_T_INROW:
if (components & VB_HAS_NRM1) {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = float4(rawnorm1.xyz, 1.0f);\n");
WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n");
}
else WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); // avoid errors
break;
case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2) {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "float4 coord = float4(rawnorm2.xyz, 1.0f);\n");
WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n");
}
else WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); // avoid errors
break;
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) )
WRITE(p, "float4 coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
else
WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); // avoid errors
WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break;
}
@ -497,7 +493,7 @@ char* GenerateLightShader(char* p, int index, const LitChannel& chan, const char
WRITE(p, "attn = max(0.0f, dot("I_LIGHTS".lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot("I_LIGHTS".lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", index, index);
}
else if (chan.attnfunc == 1) { // specular
WRITE(p, "attn = dot(_norm0, "I_LIGHTS".lights[%d].pos.xyz) > 0.0f ? max(0.0f, dot(_norm0, "I_LIGHTS".lights[%d].dir.xyz)) : 0.0f;\n", index, index);
WRITE(p, "attn = (dot(_norm0, "I_LIGHTS".lights[%d].pos.xyz) > 0.0f) ? max(0.0f, dot(_norm0, "I_LIGHTS".lights[%d].dir.xyz)) : 0.0f;\n", index, index);
WRITE(p, "ldir = float3(1,attn,attn*attn);\n");
WRITE(p, "attn = max(0.0f, dot("I_LIGHTS".lights[%d].cosatt.xyz, ldir)) / dot("I_LIGHTS".lights[%d].distatt.xyz, ldir);\n", index, index);
}

View File

@ -296,19 +296,13 @@ void Flush()
}
// update alpha only
D3D::SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, false);
D3D::dev->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_ALPHA);
D3D::dev->SetRenderState(D3DRS_ALPHABLENDENABLE, false);
Draw(stride);
if (bpmem.blendmode.alphaupdate)
write = D3DCOLORWRITEENABLE_ALPHA;
if (bpmem.blendmode.colorupdate)
write |= D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE;
if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract)
D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, true);
D3D::SetRenderState(D3DRS_COLORWRITEENABLE, write);
D3D::RefreshRenderState(D3DRS_COLORWRITEENABLE);
D3D::RefreshRenderState(D3DRS_ALPHABLENDENABLE);
}
DEBUGGER_PAUSE_AT(NEXT_FLUSH,true);