Merge branch 'new-shadercache-uids'

This branch vastly reduces stuttering caused by redundant shader compilations.
With this code Red Steel 2 has much less stuttering and is actually playable on my hardware now. Other games probably benefit in other ways as well, but don't expect all kinds of stuttering to be magically fixed now.

For reference: Set EnableShaderDebugging to True if you experience any regressions, it should automatically tell you what's wrong then.
This commit is contained in:
NeoBrainX 2011-09-30 17:46:34 +02:00
commit adef86c1ef
22 changed files with 634 additions and 421 deletions

View File

@ -21,12 +21,10 @@
#include "Common.h"
#include <fstream>
// Update this to the current SVN revision every time you change shader generation code.
// We don't automatically get this from SVN_REV because that would mean regenerating the
// shader cache for every revision, graphics-related or not, which is simply annoying.
// Increment this every time you change shader generation code.
enum
{
LINEAR_DISKCACHE_VER = 6964
LINEAR_DISKCACHE_VER = 6967
};
// On disk format:

View File

@ -86,7 +86,7 @@ bool MsgAlert(bool yes_no, int Style, const char* format, ...)
va_list args;
va_start(args, format);
CharArrayFromFormatV(buffer, 2047, str_translator(format).c_str(), args);
CharArrayFromFormatV(buffer, sizeof(buffer)-1, str_translator(format).c_str(), args);
va_end(args);
ERROR_LOG(MASTER_LOG, "%s: %s", caption.c_str(), buffer);

View File

@ -21,6 +21,21 @@
#define WRITE p+=sprintf
int GetLightingShaderId(u32* out)
{
for (int i = 0; i < xfregs.numChan.numColorChans; ++i)
{
out[i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
out[i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
_assert_(xfregs.numChan.numColorChans <= 2);
return xfregs.numChan.numColorChans;
}
// coloralpha - 1 if color, 2 if alpha
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha)
{

View File

@ -18,6 +18,9 @@
#ifndef _LIGHTINGSHADERGEN_H_
#define _LIGHTINGSHADERGEN_H_
#include "CommonTypes.h"
int GetLightingShaderId(u32* out);
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest);
#endif // _LIGHTINGSHADERGEN_H_

View File

@ -27,128 +27,264 @@
#include "VideoConfig.h"
#include "NativeVertexFormat.h"
PIXELSHADERUID last_pixel_shader_uid;
static int AlphaPreTest();
static void StageHash(int stage, u32* out)
{
out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24
u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now
out[0] |= (alphaC&0xF0) << 24; // 8
out[1] |= alphaC >> 8; // 16
// reserve 3 bits for bpmem.tevorders[stage/2].getTexMap
out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3
out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1
// reserve 3 bits for bpmem.tevorders[stage/2].getColorChan
bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages;
out[2] |= bHasIndStage << 2; // 1
bool needstexcoord = false;
if (bHasIndStage)
{
out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation
needstexcoord = true;
}
TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC;
if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
|| cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC
|| cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC
|| ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA
|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
out[0] |= bpmem.combiners[stage].alphaC.rswap;
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2
out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23;
out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1;
}
out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1);
if (bpmem.tevorders[stage/2].getEnable(stage&1))
{
if (bHasIndStage) needstexcoord = true;
out[0] |= bpmem.combiners[stage].alphaC.tswap;
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2
out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16;
}
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST
|| ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5
out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5
}
if (needstexcoord)
{
out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16;
}
}
// Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes
// are set...
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
u32 numstages = bpmem.genMode.numtevstages + 1;
u32 projtexcoords = 0;
for (u32 i = 0; i < numstages; i++)
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] |= bpmem.genMode.numtevstages; // 4
uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
uid->values[0] |= dstAlphaMode << 8; // 2
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth;
uid->values[0] |= DepthTextureEnable << 10; // 1
bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
uid->values[0] |= enablePL << 11; // 1
if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4
u32 alphaPreTest = AlphaPreTest()+1;
uid->values[0] |= alphaPreTest << 16; // 2
if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS))
{
if (bpmem.tevorders[i/2].getEnable(i & 1))
{
int texcoord = bpmem.tevorders[i / 2].getTexCoord(i & 1);
if (xfregs.texMtxInfo[i].projection)
projtexcoords |= 1 << texcoord;
}
}
uid->values[0] = (u32)bpmem.genMode.numtevstages |
((u32)bpmem.genMode.numindstages << 4) |
((u32)bpmem.genMode.numtexgens << 7) |
((u32)dstAlphaMode << 11) |
((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 13) |
(projtexcoords << 21) |
((u32)bpmem.ztex2.op << 29);
// swap table
for (int i = 0; i < 8; i += 2)
((u8*)&uid->values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4);
u32 enableZTexture = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ? 1 : 0;
uid->values[2] = (u32)bpmem.fog.c_proj_fsel.fsel |
((u32)bpmem.fog.c_proj_fsel.proj << 3) |
((u32)enableZTexture << 4) | ((u32)bpmem.fogRange.Base.Enabled << 5);
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 2; ++i) {
uid->values[3 + i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[3 + i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
}
uid->values[4] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
int hdr = 5;
u32 *pcurvalue = &uid->values[hdr];
for (u32 i = 0; i < numstages; ++i)
{
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC;
u32 val0 = cc.hex & 0xffffff;
u32 val1 = ac.hex & 0xffffff;
val0 |= bpmem.tevksel[i / 2].getKC(i & 1) << 24;
val1 |= bpmem.tevksel[i / 2].getKA(i & 1) << 24;
pcurvalue[0] = val0;
pcurvalue[1] = val1;
pcurvalue += 2;
// Courtesy of PreAlphaTest, we're done already ;)
// NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though.
uid->num_values = 1;
return;
}
for (u32 i = 0; i < numstages / 2; ++i)
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
{
u32 val0, val1;
if (bpmem.tevorders[i].hex & 0x40)
val0 = bpmem.tevorders[i].hex & 0x3ff;
if (18+i < 32)
uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1
else
val0 = bpmem.tevorders[i].hex & 0x380;
if (bpmem.tevorders[i].hex & 0x40000)
val1 = (bpmem.tevorders[i].hex & 0x3ff000) >> 12;
else
val1 = (bpmem.tevorders[i].hex & 0x380000) >> 12;
switch (i % 3) {
case 0: pcurvalue[0] = val0|(val1<<10); break;
case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break;
case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages / 2: %08x", (i % 3));
}
uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1
}
if (numstages & 1) { // odd
u32 val0;
if (bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x40)
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x3ff;
else
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x380;
uid->values[1] = bpmem.genMode.numindstages << 2; // 3
u32 indirectStagesUsed = 0;
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
indirectStagesUsed |= (1 << bpmem.tevind[i].bt);
switch (bpmem.genMode.numtevstages % 3)
{
case 0: pcurvalue[0] = val0; break;
case 1: pcurvalue[0] |= val0 << 20; break;
case 2: pcurvalue[1] |= val0 << 10; pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages: %08x", bpmem.genMode.numtevstages % 3);
}
}
assert(indirectStagesUsed == (indirectStagesUsed & 0xF));
if ((bpmem.genMode.numtevstages % 3) != 2)
++pcurvalue;
uid->values[1] |= indirectStagesUsed << 5; // 4;
uid->tevstages = (u32)(pcurvalue - &uid->values[0] - hdr);
for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
{
u32 val = bpmem.tevind[i].hex & 0x1fffff; // 21 bits
switch (i % 3)
if (indirectStagesUsed & (1 << i))
{
case 0: pcurvalue[0] = val; break;
case 1: pcurvalue[0] |= val << 21; pcurvalue[1] = val >> 11; ++pcurvalue; break;
case 2: pcurvalue[0] |= val << 10; ++pcurvalue; break;
default: PanicAlert("Unknown case for Ind Stages: %08x", (i % 3));
uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (9 + 3*i); // 1
if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens)
uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (10 + 3*i); // 2
}
}
// yeah, well ....
uid->indstages = (u32)(pcurvalue - &uid->values[0] - (hdr - 1) - uid->tevstages);
u32* ptr = &uid->values[2];
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i)
{
StageHash(i, ptr);
ptr += 4; // max: ptr = &uid->values[66]
}
ptr[0] |= bpmem.alphaFunc.comp0; // 3
ptr[0] |= bpmem.alphaFunc.comp1 << 3; // 3
ptr[0] |= bpmem.alphaFunc.logic << 6; // 2
if (alphaPreTest == 0 || alphaPreTest == 2)
{
ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 8; // 3
if (DepthTextureEnable)
{
ptr[0] |= bpmem.ztex2.op << 11; // 2
ptr[0] |= bpmem.zcontrol.zcomploc << 13; // 1
ptr[0] |= bpmem.zmode.testenable << 14; // 1
ptr[0] |= bpmem.zmode.updateenable << 15; // 1
}
}
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
if (bpmem.fog.c_proj_fsel.fsel != 0)
{
ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1
ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1
}
}
++ptr;
if (enablePL)
{
ptr += GetLightingShaderId(ptr);
*ptr++ = components;
}
uid->num_values = ptr - uid->values;
}
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = dstAlphaMode; // 0
*ptr++ = bpmem.genMode.hex; // 1
*ptr++ = bpmem.ztex2.hex; // 2
*ptr++ = bpmem.zcontrol.hex; // 3
*ptr++ = bpmem.zmode.hex; // 4
*ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6
*ptr++ = xfregs.numTexGen.hex; // 7
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
*ptr++ = xfregs.color[0].hex;
*ptr++ = xfregs.alpha[0].hex;
*ptr++ = xfregs.color[1].hex;
*ptr++ = xfregs.alpha[1].hex;
*ptr++ = components;
}
for (unsigned int i = 0; i < 8; ++i)
*ptr++ = xfregs.texMtxInfo[i].hex; // 8-15
for (unsigned int i = 0; i < 16; ++i)
*ptr++ = bpmem.tevind[i].hex; // 16-31
*ptr++ = bpmem.tevindref.hex; // 32
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times
{
*ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
*ptr++ = bpmem.tevind[i].hex; // 35+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
}
ptr = &uid->values[113];
*ptr++ = bpmem.alphaFunc.hex; // 113
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
*ptr++ = bpmem.fogRange.Base.hex; // 115
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
PIXELSHADERUIDSAFE new_id;
GetSafePixelShaderId(&new_id, dstAlphaMode, components);
if (!(old_id == new_id))
{
std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file(szTemp);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
// old tev->pixelshader notes
@ -165,7 +301,6 @@ static void SampleTexture(char *&p, const char *destination, const char *texcoor
// static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode);
static void WriteFog(char *&p);
static int AlphaPreTest();
static const char *tevKSelTableC[] = // KCSEL
{
@ -333,12 +468,6 @@ static const char *tevRasTable[] =
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
};
static const char *alphaRef[2] =
{
I_ALPHA"[0].r",
I_ALPHA"[0].g"
};
//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
@ -353,23 +482,14 @@ static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
#define WRITE p+=sprintf
static const char *swapColors = "rgba";
static char swapModeTable[4][5];
static char text[16384];
static bool DepthTextureEnable;
struct RegisterState
{
bool ColorNeedOverflowControl;
bool AlphaNeedOverflowControl;
bool AuxStored;
};
static RegisterState RegisterStates[4];
static void BuildSwapModeTable()
{
static const char *swapColors = "rgba";
for (int i = 0; i < 4; i++)
{
swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
@ -385,14 +505,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
BuildSwapModeTable();
BuildSwapModeTable(); // Needed for WriteStage
int numStages = bpmem.genMode.numtevstages + 1;
int numTexgen = bpmem.genMode.numtexgens;
char *p = text;
WRITE(p, "//Pixel Shader for TEV stages\n");
WRITE(p, "//%i TEV stages, %i texgens, %i IND stages\n",
numStages, numTexgen, bpmem.genMode.numindstages);
WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n",
numStages, numTexgen/*, bpmem.genMode.numindstages*/);
int nIndirectStagesUsed = 0;
if (bpmem.genMode.numindstages > 0)
@ -505,10 +625,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
// alpha test will always fail, so restart the shader and just make it an empty function
WRITE(p, "ocol0 = 0;\n");
if(DepthTextureEnable)
WRITE(p, "depth = 1.f;\n");
if(dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "ocol1 = 0;\n");
WRITE(p, "discard;\n");
if(ApiType != API_D3D11)
WRITE(p, "return;\n");
@ -594,16 +710,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
}
}
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
RegisterStates[0].AuxStored = false;
for(int i = 1; i < 4; i++)
{
RegisterStates[i].AlphaNeedOverflowControl = true;
RegisterStates[i].ColorNeedOverflowControl = true;
RegisterStates[i].AuxStored = false;
}
for (int i = 0; i < numStages; i++)
WriteStage(p, i, ApiType); //build the equation for this stage
@ -611,23 +717,13 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
// The results of the last texenv stage are put onto the screen,
// regardless of the used destination register
if(bpmem.combiners[numStages - 1].colorC.dest != 0)
{
bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored;
WRITE(p, "prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl;
}
if(bpmem.combiners[numStages - 1].alphaC.dest != 0)
{
bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored;
WRITE(p, "prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl;
}
WRITE(p, "prev.rgb = %s;\n",tevCOutputTable[bpmem.combiners[numStages-1].colorC.dest]);
WRITE(p, "prev.a = %s;\n",tevAOutputTable[bpmem.combiners[numStages-1].alphaC.dest]);
}
// emulation of unisgned 8 overflow when casting if needed
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
WRITE(p, "prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
// emulation of unsigned 8 overflow when casting
WRITE(p, "prev = frac(4.0f + prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
// TODO: Why are we doing a second alpha pretest here?
if (!WriteAlphaTest(p, ApiType, dstAlphaMode))
{
// alpha test will always fail, so restart the shader and just make it an empty function
@ -742,10 +838,6 @@ static const char *TEVCMPAlphaOPTable[16] =
static void WriteStage(char *&p, int n, API_TYPE ApiType)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages;
@ -754,8 +846,11 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (!bHasTexCoord)
texcoord = 0;
WRITE(p, "// TEV stage %d\n", n);
if (bHasIndStage)
{
WRITE(p, "// indirect op\n");
// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
if (bpmem.tevind[n].bs != ITBA_OFF)
{
@ -782,11 +877,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix
_assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix
_assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
}
@ -825,11 +922,15 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
bool bCRas = cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC;
bool bARas = ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA;
if(bCRas || bARas)
// blah1
if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
|| cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC
|| cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC
|| ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA
|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n");
}
@ -837,7 +938,6 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (bpmem.tevorders[n/2].getEnable(n&1))
{
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
if(!bHasIndStage)
{
// calc tevcord
@ -847,20 +947,20 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n");
}
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType);
}
else
WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
bool bCKonst = cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST;
bool bAKonst = ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST;
if (bCKonst || bAKonst )
// blah2
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST
|| ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
if(kc > 7 || ka > 7)
{
@ -872,100 +972,35 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
}
}
if(cc.a == TEVCOLORARG_CPREV
|| cc.a == TEVCOLORARG_APREV
|| cc.b == TEVCOLORARG_CPREV
|| cc.b == TEVCOLORARG_APREV
|| cc.c == TEVCOLORARG_CPREV
|| cc.c == TEVCOLORARG_APREV
|| ac.a == TEVALPHAARG_APREV
|| ac.b == TEVALPHAARG_APREV
|| ac.c == TEVALPHAARG_APREV)
{
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
{
WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cprev = prev;\n");
}
RegisterStates[0].AuxStored = true;
}
if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV
|| cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV
|| cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV
|| ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || ac.c == TEVALPHAARG_APREV)
WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(cc.a == TEVCOLORARG_C0
|| cc.a == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_C0
|| cc.b == TEVCOLORARG_A0
|| cc.c == TEVCOLORARG_C0
|| cc.c == TEVCOLORARG_A0
|| ac.a == TEVALPHAARG_A0
|| ac.b == TEVALPHAARG_A0
|| ac.c == TEVALPHAARG_A0)
{
if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
{
WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[1].AlphaNeedOverflowControl = false;
RegisterStates[1].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc0 = c0;\n");
}
RegisterStates[1].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C1
|| cc.a == TEVCOLORARG_A1
|| cc.b == TEVCOLORARG_C1
|| cc.b == TEVCOLORARG_A1
|| cc.c == TEVCOLORARG_C1
|| cc.c == TEVCOLORARG_A1
|| ac.a == TEVALPHAARG_A1
|| ac.b == TEVALPHAARG_A1
|| ac.c == TEVALPHAARG_A1)
{
if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
{
WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[2].AlphaNeedOverflowControl = false;
RegisterStates[2].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc1 = c1;\n");
}
RegisterStates[2].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0
|| cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0
|| ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0)
WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(cc.a == TEVCOLORARG_C2
|| cc.a == TEVCOLORARG_A2
|| cc.b == TEVCOLORARG_C2
|| cc.b == TEVCOLORARG_A2
|| cc.c == TEVCOLORARG_C2
|| cc.c == TEVCOLORARG_A2
|| ac.a == TEVALPHAARG_A2
|| ac.b == TEVALPHAARG_A2
|| ac.c == TEVALPHAARG_A2)
{
if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
{
if(cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1
|| cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1
|| cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1
|| ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1)
WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
if(cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2
|| cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2
|| cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2
|| ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2)
WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[3].AlphaNeedOverflowControl = false;
RegisterStates[3].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc2 = c2;\n");
}
RegisterStates[3].AuxStored = true;
}
RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
RegisterStates[cc.dest].AuxStored = false;
WRITE(p, "// color combine\n");
if (cc.clamp)
WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]);
else
@ -996,7 +1031,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, "%s", tevBiasTable[cc.bias]);
if (cc.shift > 0)
if (cc.shift > TEVSCALE_1)
WRITE(p, ")");
}
else
@ -1012,8 +1047,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, ")");
WRITE(p,";\n");
RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0);
RegisterStates[ac.dest].AuxStored = false;
WRITE(p, "// alpha combine\n");
// combine the alpha channel
if (ac.clamp)
WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]);
@ -1059,6 +1093,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (ac.clamp)
WRITE(p, ")");
WRITE(p, ";\n\n");
WRITE(p, "// TEV done\n");
}
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
@ -1124,6 +1159,11 @@ static int AlphaPreTest()
static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode)
{
static const char *alphaRef[2] =
{
I_ALPHA"[0].r",
I_ALPHA"[0].g"
};
int Pretest = AlphaPreTest();
if(Pretest >= 0)
@ -1141,7 +1181,10 @@ static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode
compindex = bpmem.alphaFunc.comp1 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
WRITE(p, ")){ocol0 = 0;%s%sdiscard;%s}\n",dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "",DepthTextureEnable ? "depth = 1.f;" : "",(ApiType != API_D3D11)? "return;" : "");
WRITE(p, ")){ocol0 = 0;%s%s discard;%s}\n",
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "",
DepthTextureEnable ? "depth = 1.f;" : "",
(ApiType != API_D3D11) ? "return;" : "");
return true;
}
@ -1199,4 +1242,4 @@ static void WriteFog(char *&p)
WRITE(p, " prev.rgb = lerp(prev.rgb,"I_FOG"[0].rgb,fog);\n");
}
}

View File

@ -44,44 +44,42 @@
#define C_PLIGHTS (C_FOG + 3)
#define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11 + 2)
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 120
// DO NOT make anything in this class virtual.
class PIXELSHADERUID
template<bool safe>
class _PIXELSHADERUID
{
public:
u32 values[PIXELSHADERUID_MAX_VALUES];
u16 tevstages, indstages;
u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
int num_values;
PIXELSHADERUID()
_PIXELSHADERUID()
{
memset(values, 0, PIXELSHADERUID_MAX_VALUES * 4);
tevstages = indstages = 0;
}
PIXELSHADERUID(const PIXELSHADERUID& r)
_PIXELSHADERUID(const _PIXELSHADERUID& r)
{
tevstages = r.tevstages;
indstages = r.indstages;
int N = GetNumValues();
_assert_(N <= PIXELSHADERUID_MAX_VALUES);
for (int i = 0; i < N; ++i)
values[i] = r.values[i];
num_values = r.num_values;
if (safe) memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE);
else memcpy(values, r.values, r.GetNumValues() * sizeof(values[0]));
}
int GetNumValues() const
{
return tevstages + indstages + 4;
if (safe) return (sizeof(values) / sizeof(u32));
else return num_values;
}
bool operator <(const PIXELSHADERUID& _Right) const
bool operator <(const _PIXELSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
if (N < _Right.GetNumValues())
return true;
else if (N > _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
@ -91,12 +89,12 @@ public:
return false;
}
bool operator ==(const PIXELSHADERUID& _Right) const
bool operator ==(const _PIXELSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
if (N != _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
@ -104,6 +102,8 @@ public:
return true;
}
};
typedef _PIXELSHADERUID<false> PIXELSHADERUID;
typedef _PIXELSHADERUID<true> PIXELSHADERUIDSAFE;
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
@ -114,8 +114,11 @@ enum DSTALPHA_MODE
};
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode);
extern PIXELSHADERUID last_pixel_shader_uid;
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components);
#endif // GCOGL_PIXELSHADER_H

View File

@ -26,25 +26,19 @@
#include "VertexShaderGen.h"
#include "VideoConfig.h"
VERTEXSHADERUID last_vertex_shader_uid;
// Mash together all the inputs that contribute to the code of a generated vertex shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] = components |
(xfregs.numTexGen.numTexGens << 23) |
(xfregs.numChan.numColorChans << 27) |
(xfregs.dualTexTrans.enabled << 29);
for (int i = 0; i < 2; ++i) {
uid->values[1+i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[1+i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
// TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here?
GetLightingShaderId(&uid->values[1]);
uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
u32 *pcurvalue = &uid->values[3];
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) {
@ -69,6 +63,69 @@ void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
}
}
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components)
{
// Just store all used registers here without caring whether we need all bits or less.
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = components;
*ptr++ = xfregs.numTexGen.hex;
*ptr++ = xfregs.numChan.hex;
*ptr++ = xfregs.dualTexTrans.hex;
for (int i = 0; i < 2; ++i) {
*ptr++ = xfregs.color[i].hex;
*ptr++ = xfregs.alpha[i].hex;
}
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
for (unsigned int i = 0; i < 8; ++i) {
*ptr++ = xfregs.texMtxInfo[i].hex;
*ptr++ = xfregs.postMtxInfo[i].hex;
}
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
VERTEXSHADERUIDSAFE new_id;
GetSafeVertexShaderId(&new_id, components);
if (!(old_id == new_id))
{
std::string new_code(GenerateVertexShaderCode(components, api));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file(szTemp);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
static char text[16384];
#define WRITE p+=sprintf
@ -244,7 +301,8 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
else
WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// TODO: This probably isn't necessary if pixel lighting is enabled.
p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
if(xfregs.numChan.numColorChans < 2)

View File

@ -48,17 +48,18 @@
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 4)
class VERTEXSHADERUID
template<bool safe>
class _VERTEXSHADERUID
{
#define NUM_VSUID_VALUES_SAFE 25
public:
u32 values[9];
u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9];
VERTEXSHADERUID()
_VERTEXSHADERUID()
{
memset(values, 0, sizeof(values));
}
VERTEXSHADERUID(const VERTEXSHADERUID& r)
_VERTEXSHADERUID(const _VERTEXSHADERUID& r)
{
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
@ -66,10 +67,11 @@ public:
int GetNumValues() const
{
return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
if (safe) return NUM_VSUID_VALUES_SAFE;
else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
}
bool operator <(const VERTEXSHADERUID& _Right) const
bool operator <(const _VERTEXSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
@ -86,7 +88,7 @@ public:
return false;
}
bool operator ==(const VERTEXSHADERUID& _Right) const
bool operator ==(const _VERTEXSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
@ -99,14 +101,18 @@ public:
return true;
}
};
typedef _VERTEXSHADERUID<false> VERTEXSHADERUID;
typedef _VERTEXSHADERUID<true> VERTEXSHADERUIDSAFE;
// components is included in the uid.
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type);
const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type);
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
extern VERTEXSHADERUID last_vertex_shader_uid;
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components);
// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components);
#endif // GCOGL_VERTEXSHADER_H

View File

@ -96,6 +96,8 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "EnableOpenCL", &bEnableOpenCL, false);
iniFile.Get("Settings", "OMPDecoder", &bOMPDecoder, false);
iniFile.Get("Settings", "EnableShaderDebugging", &bEnableShaderDebugging, false);
iniFile.Get("Enhancements", "ForceFiltering", &bForceFiltering, 0);
iniFile.Get("Enhancements", "MaxAnisotropy", &iMaxAnisotropy, 0); // NOTE - this is x in (1 << x)
iniFile.Get("Enhancements", "PostProcessingShader", &sPostProcessingShader, "");
@ -231,6 +233,8 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "EnableOpenCL", bEnableOpenCL);
iniFile.Set("Settings", "OMPDecoder", bOMPDecoder);
iniFile.Set("Settings", "EnableShaderDebugging", bEnableShaderDebugging);
iniFile.Set("Enhancements", "ForceFiltering", bForceFiltering);
iniFile.Set("Enhancements", "MaxAnisotropy", iMaxAnisotropy);
iniFile.Set("Enhancements", "PostProcessingShader", sPostProcessingShader);

View File

@ -147,6 +147,9 @@ struct VideoConfig
// D3D only config, mostly to be merged into the above
int iAdapter;
// Debugging
bool bEnableShaderDebugging;
// Static config per API
// TODO: Move this out of VideoConfig
struct

View File

@ -41,6 +41,7 @@ namespace DX11
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
@ -412,6 +413,11 @@ void PixelShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
PixelShaderCacheInserter inserter;
g_ps_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
last_entry = NULL;
}
// ONLY to be used during shutdown.
@ -420,6 +426,8 @@ void PixelShaderCache::Clear()
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
last_entry = NULL;
}
// Used in Swap() when AA mode has changed
@ -454,28 +462,31 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode);
GetPixelShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount)
if (last_entry)
{
PSCache::const_iterator iter = PixelShaders.find(uid);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
return (iter != PixelShaders.end() && iter->second.shader);
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return (last_entry->shader != NULL);
}
}
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID));
last_uid = uid;
// Check if the shader is already in the cache
PSCache::iterator iter;
iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
iter->second.frameCount = frameCount;
const PSCacheEntry &entry = iter->second;
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL);
}
@ -491,12 +502,18 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
// Insert the bytecode into the caches
g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
g_ps_disk_cache.Sync();
bool result = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
pbytecode->Release();
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return result;
return success;
}
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen)
@ -511,7 +528,6 @@ bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* byt
// Make an entry in the table
PSCacheEntry newentry;
newentry.shader = shader;
newentry.frameCount = frameCount;
PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid];

View File

@ -17,11 +17,12 @@
#pragma once
#include <map>
#include "PixelShaderGen.h"
#include <d3d11.h>
class PIXELSHADERUID;
#include <map>
enum DSTALPHA_MODE;
namespace DX11
@ -52,9 +53,11 @@ private:
struct PSCacheEntry
{
ID3D11PixelShader* shader;
int frameCount;
PSCacheEntry() : shader(NULL), frameCount(0) {}
PIXELSHADERUIDSAFE safe_uid;
std::string code;
PSCacheEntry() : shader(NULL) {}
void Destroy() { SAFE_RELEASE(shader); }
};
@ -62,6 +65,7 @@ private:
static PSCache PixelShaders;
static const PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid;
};
} // namespace DX11

View File

@ -37,6 +37,7 @@ namespace DX11 {
VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
static ID3D11VertexShader* SimpleVertexShader = NULL;
static ID3D11VertexShader* ClearVertexShader = NULL;
@ -174,6 +175,11 @@ void VertexShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
VertexShaderCacheInserter inserter;
g_vs_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
last_entry = NULL;
}
void VertexShaderCache::Clear()
@ -181,6 +187,8 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy();
vshaders.clear();
last_entry = NULL;
}
void VertexShaderCache::Shutdown()
@ -202,22 +210,26 @@ bool VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount)
if (last_entry)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return (vshaders[uid].shader != NULL);
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
}
memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID));
last_uid = uid;
VSCache::iterator iter = vshaders.find(uid);
if (iter != vshaders.end())
{
iter->second.frameCount = frameCount;
const VSCacheEntry &entry = iter->second;
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components);
return (entry.shader != NULL);
}
@ -232,12 +244,18 @@ bool VertexShaderCache::SetShader(u32 components)
return false;
}
g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
g_vs_disk_cache.Sync();
bool result = InsertByteCode(uid, pbytecode);
bool success = InsertByteCode(uid, pbytecode);
pbytecode->Release();
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return result;
return success;
}
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob)
@ -252,7 +270,6 @@ bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcod
// Make an entry in the table
VSCacheEntry entry;
entry.shader = shader;
entry.frameCount = frameCount;
entry.SetByteCode(bcodeblob);
vshaders[uid] = entry;

View File

@ -18,12 +18,12 @@
#ifndef _VERTEXSHADERCACHE_H
#define _VERTEXSHADERCACHE_H
#include <map>
#include "VertexShaderGen.h"
#include "D3DBase.h"
#include "D3DBlob.h"
class VERTEXSHADERUID;
#include <map>
namespace DX11 {
@ -51,9 +51,11 @@ private:
{
ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout
int frameCount;
VSCacheEntry() : shader(NULL), bytecode(NULL), frameCount(0) {}
VERTEXSHADERUIDSAFE safe_uid;
std::string code;
VSCacheEntry() : shader(NULL), bytecode(NULL) {}
void SetByteCode(D3DBlob* blob)
{
SAFE_RELEASE(bytecode);
@ -70,6 +72,7 @@ private:
static VSCache vshaders;
static const VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid;
};
} // namespace DX11

View File

@ -43,6 +43,7 @@ namespace DX9
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders;
@ -233,6 +234,8 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
void PixelShaderCache::Init()
{
last_entry = NULL;
//program used for clear screen
{
char pprog[3072];
@ -283,6 +286,9 @@ void PixelShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
PixelShaderCacheInserter inserter;
g_ps_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
}
// ONLY to be used during shutdown.
@ -292,7 +298,7 @@ void PixelShaderCache::Clear()
iter->second.Destroy();
PixelShaders.clear();
memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid));
last_entry = NULL;
}
void PixelShaderCache::Shutdown()
@ -326,41 +332,47 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30;
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode);
GetPixelShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount)
if (last_entry)
{
PSCache::const_iterator iter = PixelShaders.find(uid);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return (iter != PixelShaders.end() && iter->second.shader);
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return last_entry->shader != NULL;
}
}
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID));
last_uid = uid;
// Check if the shader is already in the cache
PSCache::iterator iter;
iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
iter->second.frameCount = frameCount;
const PSCacheEntry &entry = iter->second;
last_entry = &entry;
if (entry.shader) D3D::SetPixelShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL);
}
// Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30, components);
const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components);
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
unique_shaders.insert(code_hash);
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size());
if (g_ActiveConfig.bEnableShaderDebugging)
{
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
unique_shaders.insert(code_hash);
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size());
}
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
@ -381,14 +393,19 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
// Insert the bytecode into the caches
g_ps_disk_cache.Append(uid, bytecode, bytecodelen);
g_ps_disk_cache.Sync();
// And insert it into the shader cache.
bool result = InsertByteCode(uid, bytecode, bytecodelen, true);
bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
delete [] bytecode;
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return result;
return success;
}
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate)
@ -398,7 +415,6 @@ bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytec
// Make an entry in the table
PSCacheEntry newentry;
newentry.shader = shader;
newentry.frameCount = frameCount;
PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid];

View File

@ -40,9 +40,11 @@ private:
{
LPDIRECT3DPIXELSHADER9 shader;
bool owns_shader;
int frameCount;
PSCacheEntry() : shader(NULL), owns_shader(true), frameCount(0) {}
PIXELSHADERUIDSAFE safe_uid;
std::string code;
PSCacheEntry() : shader(NULL), owns_shader(true) {}
void Destroy()
{
if (shader && owns_shader)
@ -55,6 +57,7 @@ private:
static PSCache PixelShaders;
static const PSCacheEntry *last_entry;
static PIXELSHADERUID last_uid;
static void Clear();
public:

View File

@ -38,6 +38,7 @@ namespace DX9
VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
#define MAX_SSAA_SHADERS 3
@ -151,6 +152,11 @@ void VertexShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
VertexShaderCacheInserter inserter;
g_vs_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
last_entry = NULL;
}
void VertexShaderCache::Clear()
@ -159,7 +165,7 @@ void VertexShaderCache::Clear()
iter->second.Destroy();
vshaders.clear();
memset(&last_vertex_shader_uid, 0xFF, sizeof(last_vertex_shader_uid));
last_entry = NULL;
}
void VertexShaderCache::Shutdown()
@ -184,23 +190,27 @@ bool VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount)
if (last_entry)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return (vshaders[uid].shader != NULL);
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
}
memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID));
last_uid = uid;
VSCache::iterator iter = vshaders.find(uid);
if (iter != vshaders.end())
{
iter->second.frameCount = frameCount;
const VSCacheEntry &entry = iter->second;
last_entry = &entry;
if (entry.shader) D3D::SetVertexShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components);
return (entry.shader != NULL);
}
@ -213,12 +223,16 @@ bool VertexShaderCache::SetShader(u32 components)
return false;
}
g_vs_disk_cache.Append(uid, bytecode, bytecodelen);
g_vs_disk_cache.Sync();
bool result = InsertByteCode(uid, bytecode, bytecodelen, true);
bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
}
delete [] bytecode;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return result;
return success;
}
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
@ -227,7 +241,6 @@ bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *byt
// Make an entry in the table
VSCacheEntry entry;
entry.shader = shader;
entry.frameCount = frameCount;
vshaders[uid] = entry;
last_entry = &vshaders[uid];

View File

@ -34,11 +34,11 @@ private:
struct VSCacheEntry
{
LPDIRECT3DVERTEXSHADER9 shader;
int frameCount;
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string code;
#endif
VSCacheEntry() : shader(NULL), frameCount(0) {}
VERTEXSHADERUIDSAFE safe_uid;
VSCacheEntry() : shader(NULL) {}
void Destroy()
{
if (shader)
@ -51,6 +51,7 @@ private:
static VSCache vshaders;
static const VSCacheEntry *last_entry;
static VERTEXSHADERUID last_uid;
static void Clear();
public:

View File

@ -44,7 +44,8 @@ bool PixelShaderCache::s_displayCompileAlert;
GLuint PixelShaderCache::CurrentShader;
bool PixelShaderCache::ShaderEnabled;
static FRAGMENTSHADER* pShaderLast = NULL;
PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry = NULL;
PIXELSHADERUID PixelShaderCache::last_uid;
GLuint PixelShaderCache::GetDepthMatrixProgram()
{
@ -61,10 +62,9 @@ void PixelShaderCache::Init()
glEnable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = true;
CurrentShader = 0;
last_entry = NULL;
GL_REPORT_ERRORD();
memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid));
s_displayCompileAlert = true;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, (GLint *)&s_nMaxPixelInstructions);
@ -184,38 +184,43 @@ void PixelShaderCache::Shutdown()
FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode);
GetPixelShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount)
if (last_entry)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return pShaderLast;
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(API_OPENGL, last_entry->safe_uid, last_entry->shader.strprog, dstAlphaMode, components);
return &last_entry->shader;
}
}
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID));
last_uid = uid;
PSCache::iterator iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
iter->second.frameCount = frameCount;
PSCacheEntry &entry = iter->second;
if (&entry.shader != pShaderLast)
{
pShaderLast = &entry.shader;
}
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return pShaderLast;
ValidatePixelShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, dstAlphaMode, components);
return &last_entry->shader;
}
// Make an entry in the table
PSCacheEntry& newentry = PixelShaders[uid];
newentry.frameCount = frameCount;
pShaderLast = &newentry.shader;
last_entry = &newentry;
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);
if (g_ActiveConfig.bEnableShaderDebugging && code)
{
GetSafePixelShaderId(&newentry.safe_uid, dstAlphaMode, components);
newentry.shader.strprog = code;
}
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
static int counter = 0;
@ -234,7 +239,7 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp
INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return pShaderLast;
return &last_entry->shader;
}
bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram)
@ -318,9 +323,6 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
cgDestroyProgram(tempprog);
#endif
#if defined(_DEBUG) || defined(DEBUGFAST)
ps.strprog = pstrprogram;
#endif
return true;
}

View File

@ -39,9 +39,7 @@ struct FRAGMENTSHADER
}
}
GLuint glprogid; // opengl program id
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string strprog;
#endif
};
class PixelShaderCache
@ -49,13 +47,13 @@ class PixelShaderCache
struct PSCacheEntry
{
FRAGMENTSHADER shader;
int frameCount;
PSCacheEntry() : frameCount(0) {}
PSCacheEntry() {}
~PSCacheEntry() {}
void Destroy()
{
shader.Destroy();
}
PIXELSHADERUIDSAFE safe_uid;
};
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
@ -67,6 +65,8 @@ class PixelShaderCache
static bool s_displayCompileAlert;
static GLuint CurrentShader;
static PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid;
static bool ShaderEnabled;

View File

@ -41,7 +41,9 @@ VertexShaderCache::VSCache VertexShaderCache::vshaders;
GLuint VertexShaderCache::CurrentShader;
bool VertexShaderCache::ShaderEnabled;
static VERTEXSHADER *pShaderLast = NULL;
VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry = NULL;
VERTEXSHADERUID VertexShaderCache::last_uid;
static int s_nMaxVertexInstructions;
@ -50,7 +52,7 @@ void VertexShaderCache::Init()
glEnable(GL_VERTEX_PROGRAM_ARB);
ShaderEnabled = true;
CurrentShader = 0;
memset(&last_vertex_shader_uid, 0xFF, sizeof(last_vertex_shader_uid));
last_entry = NULL;
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions);
if (strstr((const char*)glGetString(GL_VENDOR), "Humper") != NULL) s_nMaxVertexInstructions = 4096;
@ -74,31 +76,34 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount)
if (last_entry)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return pShaderLast;
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_OPENGL, vshaders[uid].safe_uid, vshaders[uid].shader.strprog, components);
return &last_entry->shader;
}
}
memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID));
last_uid = uid;
VSCache::iterator iter = vshaders.find(uid);
if (iter != vshaders.end())
{
iter->second.frameCount = frameCount;
VSCacheEntry &entry = iter->second;
if (&entry.shader != pShaderLast) {
pShaderLast = &entry.shader;
}
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return pShaderLast;
ValidateVertexShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, components);
return &last_entry->shader;
}
// Make an entry in the table
VSCacheEntry& entry = vshaders[uid];
entry.frameCount = frameCount;
pShaderLast = &entry.shader;
last_entry = &entry;
const char *code = GenerateVertexShaderCode(components, API_OPENGL);
GetSafeVertexShaderId(&entry.safe_uid, components);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
@ -118,7 +123,7 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components)
INCSTAT(stats.numVertexShadersCreated);
SETSTAT(stats.numVertexShadersAlive, vshaders.size());
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return pShaderLast;
return &last_entry->shader;
}
bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram)
@ -182,9 +187,8 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr
cgDestroyProgram(tempprog);
#endif
#if defined(_DEBUG) || defined(DEBUGFAST)
vs.strprog = pstrprogram;
#endif
if (g_ActiveConfig.bEnableShaderDebugging)
vs.strprog = pstrprogram;
return true;
}

View File

@ -32,9 +32,7 @@ struct VERTEXSHADER
VERTEXSHADER() : glprogid(0) {}
GLuint glprogid; // opengl program id
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string strprog;
#endif
};
class VertexShaderCache
@ -42,8 +40,8 @@ class VertexShaderCache
struct VSCacheEntry
{
VERTEXSHADER shader;
int frameCount;
VSCacheEntry() : frameCount(0) {}
VERTEXSHADERUIDSAFE safe_uid;
VSCacheEntry() {}
void Destroy() {
// printf("Destroying vs %i\n", shader.glprogid);
glDeleteProgramsARB(1, &shader.glprogid);
@ -55,6 +53,9 @@ class VertexShaderCache
static VSCache vshaders;
static VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid;
static GLuint CurrentShader;
static bool ShaderEnabled;