Add some code for generating a shader constant usage profile.

This commit is contained in:
NeoBrainX 2012-09-02 20:00:15 +02:00 committed by NeoBrainX
parent 700cce9588
commit 0fdeb81038
6 changed files with 163 additions and 42 deletions

View File

@ -299,9 +299,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
} }
out.Write("\n"); out.Write("\n");
out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); // TODO: first element not used??
out.Write("uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); out.Write("uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS);
out.Write("uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); out.Write("uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); // TODO: Why is this an array...-.-
out.Write("uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); out.Write("uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS);
out.Write("uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); out.Write("uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS);
out.Write("uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); out.Write("uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE);
@ -390,6 +390,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
"float3 ldir, h;\n" "float3 ldir, h;\n"
"float dist, dist2, attn;\n"); "float dist, dist2, attn;\n");
/// TODO /// TODO
out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further
out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
/// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); /// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_");
} }
@ -405,6 +407,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
} }
else else
{ {
out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS+numTexgen-1);
for (unsigned int i = 0; i < numTexgen; ++i) for (unsigned int i = 0; i < numTexgen; ++i)
{ {
// optional perspective divides // optional perspective divides
@ -450,7 +453,10 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
SetUidField(tevindref.bi4, texmap); SetUidField(tevindref.bi4, texmap);
} }
if (texcoord < numTexgen) if (texcoord < numTexgen)
{
out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2);
out.Write("tempcoord = uv%d.xy * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy"); out.Write("tempcoord = uv%d.xy * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
}
else else
out.Write("tempcoord = float2(0.0f, 0.0f);\n"); out.Write("tempcoord = float2(0.0f, 0.0f);\n");
@ -504,12 +510,14 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
WriteAlphaTest<T, type>(out, ApiType, dstAlphaMode); WriteAlphaTest<T, type>(out, ApiType, dstAlphaMode);
// the screen space depth value = far z + (clip z / clip w) * z range // the screen space depth value = far z + (clip z / clip w) * z range
out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1);
out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n");
// Note: depth textures are disabled if early depth test is enabled // Note: depth textures are disabled if early depth test is enabled
if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable)
{ {
// use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1);
out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n",
(bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : "");
@ -521,7 +529,10 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
out.Write("depth = zCoord;\n"); out.Write("depth = zCoord;\n");
if (dstAlphaMode == DSTALPHA_ALPHA_PASS) if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
out.Write(" ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); out.Write(" ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n");
}
else else
{ {
WriteFog<T, type>(out); WriteFog<T, type>(out);
@ -532,6 +543,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u
// single pass // single pass
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
{ {
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
// Colors will be blended against the alpha from ocol1... // Colors will be blended against the alpha from ocol1...
out.Write(" ocol1 = ocol0;\n"); out.Write(" ocol1 = ocol0;\n");
// ...and the alpha from ocol0 will be written to the framebuffer. // ...and the alpha from ocol0 will be written to the framebuffer.
@ -639,6 +651,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
if (bpmem.tevind[n].mid <= 3) if (bpmem.tevind[n].mid <= 3)
{ {
int mtxidx = 2*(bpmem.tevind[n].mid-1); int mtxidx = 2*(bpmem.tevind[n].mid-1);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n",
n, mtxidx, n, mtxidx+1, n); n, mtxidx, n, mtxidx+1, n);
} }
@ -646,12 +659,14 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
{ // s matrix { // s matrix
_assert_(bpmem.tevind[n].mid >= 5); _assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5); int mtxidx = 2*(bpmem.tevind[n].mid-5);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
} }
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix { // t matrix
_assert_(bpmem.tevind[n].mid >= 9); _assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9); int mtxidx = 2*(bpmem.tevind[n].mid-9);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
} }
else else
@ -757,6 +772,10 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
{ {
out.Write("ckonsttemp = konsttemp;\n"); out.Write("ckonsttemp = konsttemp;\n");
} }
if (kc > 7)
out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4));
if (ka > 7)
out.SetConstantsUsed(C_KCOLORS+((ka-0xc)%4),C_KCOLORS+((ka-0xc)%4));
} }
if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV
@ -782,6 +801,8 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
|| cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0
|| ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0) || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0)
{ {
// TODO: WTF?
out.SetConstantsUsed(C_COLORS+1,C_COLORS+1);
if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
{ {
out.Write("cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); out.Write("cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
@ -800,6 +821,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
|| cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1 || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1
|| ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1) || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1)
{ {
out.SetConstantsUsed(C_COLORS+2,C_COLORS+2);
if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
{ {
out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
@ -818,6 +840,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
|| cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2 || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2
|| ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2) || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2)
{ {
out.SetConstantsUsed(C_COLORS+3,C_COLORS+3);
if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
{ {
out.Write("cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); out.Write("cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
@ -834,6 +857,28 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
RegisterStates[cc.dest].AuxStored = false; RegisterStates[cc.dest].AuxStored = false;
/* if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0)
{
out.SetConstantsUsed(C_COLORS+1,C_COLORS+1);
// TODO: 11 bit signed overflow..
}
if (cc.d == TEVCOLORARG_C1 || cc.d == TEVCOLORARG_A1 || ac.d == TEVALPHAARG_A1)
{
out.SetConstantsUsed(C_COLORS+2,C_COLORS+2);
// TODO: 11 bit signed overflow..
}
if (cc.d == TEVCOLORARG_C2 || cc.d == TEVCOLORARG_A2 || ac.d == TEVALPHAARG_A2)
{
out.SetConstantsUsed(C_COLORS+3,C_COLORS+3);
// TODO: 11 bit signed overflow..
}*/
// TODO: Are there enums for this?
if (cc.dest >= 1 && cc.dest <= 3)
out.SetConstantsUsed(C_COLORS+cc.dest, C_COLORS+cc.dest);
if (ac.dest >= 1 && ac.dest <= 3)
out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest);
out.Write("// color combine\n"); out.Write("// color combine\n");
if (cc.clamp) if (cc.clamp)
out.Write("%s = saturate(", tevCOutputTable[cc.dest]); out.Write("%s = saturate(", tevCOutputTable[cc.dest]);
@ -935,6 +980,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType)
template<class T, GenOutput type> template<class T, GenOutput type>
void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
{ {
out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap);
if (ApiType == API_D3D11) if (ApiType == API_D3D11)
out.Write("%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); out.Write("%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap);
else else
@ -970,6 +1016,8 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode)
I_ALPHA"[0].g" I_ALPHA"[0].g"
}; };
out.SetConstantsUsed(C_ALPHA, C_ALPHA);
// using discard then return works the same in cg and dx9 but not in dx11 // using discard then return works the same in cg and dx9 but not in dx11
out.Write("if(!( "); out.Write("if(!( ");
@ -1034,6 +1082,7 @@ static void WriteFog(T& out)
SetUidField(fog.proj, bpmem.fog.c_proj_fsel.proj); SetUidField(fog.proj, bpmem.fog.c_proj_fsel.proj);
out.SetConstantsUsed(C_FOG, C_FOG+1);
if (bpmem.fog.c_proj_fsel.proj == 0) if (bpmem.fog.c_proj_fsel.proj == 0)
{ {
// perspective // perspective
@ -1053,6 +1102,7 @@ static void WriteFog(T& out)
SetUidField(fog.RangeBaseEnabled, bpmem.fogRange.Base.Enabled); SetUidField(fog.RangeBaseEnabled, bpmem.fogRange.Base.Enabled);
if(bpmem.fogRange.Base.Enabled) if(bpmem.fogRange.Base.Enabled)
{ {
out.SetConstantsUsed(C_FOG+2, C_FOG+2);
out.Write(" float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); out.Write(" float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n");
out.Write(" x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); out.Write(" x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n");
out.Write(" ze *= x_adjust;\n"); out.Write(" ze *= x_adjust;\n");
@ -1082,3 +1132,9 @@ void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode
{ {
GeneratePixelShader<PixelShaderCode, GO_ShaderCode>(object, dstAlphaMode, ApiType, components); GeneratePixelShader<PixelShaderCode, GO_ShaderCode>(object, dstAlphaMode, ApiType, components);
} }
void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
{
GeneratePixelShader<PixelShaderConstantProfile, GO_ShaderCode>(object, dstAlphaMode, ApiType, components);
}

View File

@ -72,7 +72,6 @@ struct pixel_shader_uid_data
u32 numtevstages : 4; u32 numtevstages : 4;
u32 numindstages : 3; u32 numindstages : 3;
} genMode; } genMode;
u32 fogc_proj_fselfsel : 3;
struct struct
{ {
u32 unknown : 1; u32 unknown : 1;
@ -177,11 +176,11 @@ struct pixel_shader_uid_data
typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid; typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
typedef ShaderCode<pixel_shader_uid_data> PixelShaderCode; typedef ShaderCode<pixel_shader_uid_data> PixelShaderCode;
//typedef ShaderConstantProfile<pixel_shader_uid_data> PixelShaderConstantProfile; typedef ShaderConstantProfile<pixel_shader_uid_data> PixelShaderConstantProfile;
void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
//void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
#endif // GCOGL_PIXELSHADER_H #endif // GCOGL_PIXELSHADER_H

View File

@ -83,39 +83,58 @@ void PixelShaderManager::Shutdown()
} }
void PixelShaderManager::SetConstants() void PixelShaderManager::SetConstants(u32 components)
{ {
for (int i = 0; i < 2; ++i) PixelShaderConstantProfile constant_profile(C_PENVCONST_END);
/// TODO: dst alpha/api/components type parameter...
GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components);
static int saved_updates = 0;
static int necessary_updates = 0;
#define IncStuff() { \
saved_updates++; \
printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates); }
for (int i = 0; i < 2; ++i)
{ {
if (s_nColorsChanged[i]) if (s_nColorsChanged[i])
{ {
int baseind = i ? C_KCOLORS : C_COLORS; int baseind = i ? C_KCOLORS : C_COLORS;
for (int j = 0; j < 4; ++j) for (int j = 0; j < 4; ++j)
{ {
if (s_nColorsChanged[i] & (1 << j)) if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j))
SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]); {
} SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]);
s_nColorsChanged[i] = 0; s_nColorsChanged[i] &= ~(1<<j);
} ++necessary_updates;
} } else if ((s_nColorsChanged[i] & (1 << j))) IncStuff();
}
}
}
if (s_nTexDimsChanged) if (s_nTexDimsChanged)
{ {
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
if (s_nTexDimsChanged & (1<<i)) if (s_nTexDimsChanged & (1<<i) && constant_profile.ConstantIsUsed(C_TEXDIMS+i))
{
++necessary_updates;
SetPSTextureDims(i); SetPSTextureDims(i);
s_nTexDimsChanged &= ~(1<<i);
}else if (s_nTexDimsChanged & (1<<i)) IncStuff();
} }
s_nTexDimsChanged = 0;
} }
if (s_bAlphaChanged) if (s_bAlphaChanged && constant_profile.ConstantIsUsed(C_ALPHA))
{ {
++necessary_updates;
SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f);
s_bAlphaChanged = false; s_bAlphaChanged = false;
} } else if (s_bAlphaChanged) IncStuff();
if (s_bZTextureTypeChanged) if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS))
{ {
float ftemp[4]; float ftemp[4];
switch (bpmem.ztex2.type) switch (bpmem.ztex2.type)
@ -133,11 +152,12 @@ void PixelShaderManager::SetConstants()
ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0; ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0;
break; break;
} }
++necessary_updates;
SetPSConstant4fv(C_ZBIAS, ftemp); SetPSConstant4fv(C_ZBIAS, ftemp);
s_bZTextureTypeChanged = false; s_bZTextureTypeChanged = false;
} } else if (s_bZTextureTypeChanged) IncStuff();
if (s_bZBiasChanged || s_bDepthRangeChanged) if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1))
{ {
// reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz)
// [0] = width/2 // [0] = width/2
@ -148,9 +168,10 @@ void PixelShaderManager::SetConstants()
// [5] = 16777215 * farz // [5] = 16777215 * farz
//ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias);
++necessary_updates;
SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f); SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f);
s_bZBiasChanged = s_bDepthRangeChanged = false; s_bZBiasChanged = s_bDepthRangeChanged = false;
} }else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff();
// indirect incoming texture scales // indirect incoming texture scales
if (s_nIndTexScaleChanged) if (s_nIndTexScaleChanged)
@ -158,7 +179,7 @@ void PixelShaderManager::SetConstants()
// set as two sets of vec4s, each containing S and T of two ind stages. // set as two sets of vec4s, each containing S and T of two ind stages.
float f[8]; float f[8];
if (s_nIndTexScaleChanged & 0x03) if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE))
{ {
for (u32 i = 0; i < 2; ++i) for (u32 i = 0; i < 2; ++i)
{ {
@ -166,26 +187,30 @@ void PixelShaderManager::SetConstants()
f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1); f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
} }
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE, f); SetPSConstant4fv(C_INDTEXSCALE, f);
s_nIndTexScaleChanged &= ~0x03;
} }
else if ((s_nIndTexScaleChanged & 0x03)) IncStuff();
if (s_nIndTexScaleChanged & 0x0c) { if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1)) {
for (u32 i = 2; i < 4; ++i) { for (u32 i = 2; i < 4; ++i) {
f[2 * i] = bpmem.texscale[1].getScaleS(i & 1); f[2 * i] = bpmem.texscale[1].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1); f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
} }
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]); SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]);
s_nIndTexScaleChanged &= ~0x0c;
} }
else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff();
s_nIndTexScaleChanged = 0;
} }
if (s_nIndTexMtxChanged) if (s_nIndTexMtxChanged)
{ {
for (int i = 0; i < 3; ++i) for (int i = 0; i < 3; ++i)
{ {
if (s_nIndTexMtxChanged & (1 << i)) if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1)))
{ {
int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) | int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
((u32)bpmem.indmtx[i].col1.s1 << 2) | ((u32)bpmem.indmtx[i].col1.s1 << 2) |
@ -195,6 +220,8 @@ void PixelShaderManager::SetConstants()
// xyz - static matrix // xyz - static matrix
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better // TODO w - dynamic matrix scale / 256...... somehow / 4 works better
// rev 2972 - now using / 256.... verify that this works // rev 2972 - now using / 256.... verify that this works
++necessary_updates;
++necessary_updates;
SetPSConstant4f(C_INDTEXMTX + 2 * i, SetPSConstant4f(C_INDTEXMTX + 2 * i,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col0.ma * fscale,
bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col1.mc * fscale,
@ -210,19 +237,22 @@ void PixelShaderManager::SetConstants()
i, 1024.0f*fscale, i, 1024.0f*fscale,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale,
bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale); bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);
}
s_nIndTexMtxChanged &= ~(1 << i);
}else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();}
} }
s_nIndTexMtxChanged = 0;
} }
if (s_bFogColorChanged) if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG))
{ {
++necessary_updates;
SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0); SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0);
s_bFogColorChanged = false; s_bFogColorChanged = false;
} }else if (s_bFogColorChanged) IncStuff();
if (s_bFogParamChanged) if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1))
{ {
++necessary_updates;
if(!g_ActiveConfig.bDisableFog) if(!g_ActiveConfig.bDisableFog)
{ {
//downscale magnitude to 0.24 bits //downscale magnitude to 0.24 bits
@ -235,10 +265,11 @@ void PixelShaderManager::SetConstants()
SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0); SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0);
s_bFogParamChanged = false; s_bFogParamChanged = false;
} }else if ( s_bFogParamChanged) IncStuff();
if (s_bFogRangeAdjustChanged) if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2))
{ {
++necessary_updates;
if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1) if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
{ {
//bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342; //bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342;
@ -257,8 +288,9 @@ void PixelShaderManager::SetConstants()
SetPSConstant4f(C_FOG + 2, 0.0f, 1.0f, 1.0f, 0.0f); // Need to update these values for older hardware that fails to divide by zero in shaders. SetPSConstant4f(C_FOG + 2, 0.0f, 1.0f, 1.0f, 0.0f); // Need to update these values for older hardware that fails to divide by zero in shaders.
s_bFogRangeAdjustChanged = false; s_bFogRangeAdjustChanged = false;
} }else if ( s_bFogRangeAdjustChanged) IncStuff();
// TODO: use constant profile here!
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f
{ {
if (nLightsChanged[0] >= 0) if (nLightsChanged[0] >= 0)
@ -353,7 +385,8 @@ void PixelShaderManager::SetPSTextureDims(int texid)
SetPSConstant4fv(C_TEXDIMS + texid, fdims); SetPSConstant4fv(C_TEXDIMS + texid, fdims);
} }
// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value // This one is high in profiles (0.5%).
// TODO: Move conversion out, only store the raw color value
// and update it when the shader constant is set, only. // and update it when the shader constant is set, only.
void PixelShaderManager::SetColorChanged(int type, int num, bool high) void PixelShaderManager::SetColorChanged(int type, int num, bool high)
{ {

View File

@ -34,7 +34,7 @@ public:
static void Shutdown(); static void Shutdown();
static void DoState(PointerWrap &p); static void DoState(PointerWrap &p);
static void SetConstants(); // sets pixel shader constants static void SetConstants(u32 components); // sets pixel shader constants
// constant management, should be called after memory is committed // constant management, should be called after memory is committed
static void SetColorChanged(int type, int index, bool high); static void SetColorChanged(int type, int index, bool high);

View File

@ -23,6 +23,8 @@
#include <string.h> #include <string.h>
#include "CommonTypes.h" #include "CommonTypes.h"
#include <vector>
template<class uid_data> template<class uid_data>
class ShaderUid class ShaderUid
{ {
@ -36,6 +38,7 @@ public:
void Write(const char* fmt, ...) {} void Write(const char* fmt, ...) {}
const char* GetBuffer() { return NULL; } const char* GetBuffer() { return NULL; }
void SetBuffer(char* buffer) { } void SetBuffer(char* buffer) { }
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
bool operator == (const ShaderUid& obj) const bool operator == (const ShaderUid& obj) const
{ {
@ -55,7 +58,7 @@ public:
return false; return false;
} }
uid_data& GetUidData() { return data; } inline uid_data& GetUidData() { return data; }
private: private:
union union
@ -86,16 +89,46 @@ public:
const char* GetBuffer() { return buf; } const char* GetBuffer() { return buf; }
void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; }
uid_data& GetUidData() { return *(uid_data*)NULL; } uid_data& GetUidData() { return *(uid_data*)NULL; }
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
private: private:
const char* buf; const char* buf;
char* write_ptr; char* write_ptr;
}; };
template<class uid_data>
class ShaderConstantProfile
{
public:
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }
void Write(const char* fmt, ...) {}
const char* GetBuffer() { return NULL; }
void SetBuffer(char* buffer) { }
uid_data& GetUidData() { return *(uid_data*)NULL; }
// has room for optimization (if it matters at all...)
void NumConstants() { return constant_usage.size(); }
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index+1; ++i)
constant_usage[i] = true;
}
inline bool ConstantIsUsed(unsigned int index)
{
return constant_usage[index];
}
private:
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
};
enum GenOutput enum GenOutput
{ {
GO_ShaderCode, GO_ShaderCode,
GO_ShaderUid, GO_ShaderUid,
GO_ShaderConstantProfile,
}; };
#endif // _SHADERGENCOMMON_H #endif // _SHADERGENCOMMON_H

View File

@ -180,7 +180,7 @@ void VertexManager::vFlush()
// set global constants // set global constants
VertexShaderManager::SetConstants(); VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants(); PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate
&& bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; && bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;