From ca0e292dd4553d078393d626444c2b88922f5b62 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 6 Aug 2012 22:41:30 +0200 Subject: [PATCH 01/54] Replace the shader uid system with a new one which quasi-automatically generates uids for shaders. Currently used in the vertex shader only (had to fork lighting shaders for now). --- Source/Core/VideoCommon/Src/Debugger.cpp | 2 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 609 +++++++++++------- Source/Core/VideoCommon/Src/VertexShaderGen.h | 124 ++-- .../Plugin_VideoOGL/Src/VertexShaderCache.cpp | 17 +- .../Plugin_VideoOGL/Src/VertexShaderCache.h | 6 +- 5 files changed, 464 insertions(+), 294 deletions(-) diff --git a/Source/Core/VideoCommon/Src/Debugger.cpp b/Source/Core/VideoCommon/Src/Debugger.cpp index 4aa25b72be..005f81c8e6 100644 --- a/Source/Core/VideoCommon/Src/Debugger.cpp +++ b/Source/Core/VideoCommon/Src/Debugger.cpp @@ -131,7 +131,7 @@ void GFXDebuggerBase::DumpVertexShader(const char* path) sprintf(filename, "%sdump_vs.txt", path); File::CreateEmptyFile(filename); - File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename); +/// File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename); } void GFXDebuggerBase::DumpPixelShaderConstants(const char* path) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index cad117c2c8..d7134b6dcb 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -26,149 +26,260 @@ #include "VertexShaderGen.h" #include "VideoConfig.h" -// Mash together all the inputs that contribute to the code of a generated vertex shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) +static char text[16768]; + +enum GenOutput { - memset(uid->values, 0, sizeof(uid->values)); - uid->values[0] = components | - (xfregs.numTexGen.numTexGens << 23) | - (xfregs.numChan.numColorChans << 27) | - (xfregs.dualTexTrans.enabled << 29); - - // TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here? - GetLightingShaderId(&uid->values[1]); - - uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31; - u32 *pcurvalue = &uid->values[3]; - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - TexMtxInfo tinfo = xfregs.texMtxInfo[i]; - if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) - tinfo.hex &= 0x7ff; - if (tinfo.texgentype != XF_TEXGEN_REGULAR) - tinfo.projection = 0; - - u32 val = ((tinfo.hex >> 1) & 0x1ffff); - if (xfregs.dualTexTrans.enabled && tinfo.texgentype == XF_TEXGEN_REGULAR) { - // rewrite normalization and post index - val |= ((u32)xfregs.postMtxInfo[i].index << 17) | ((u32)xfregs.postMtxInfo[i].normalize << 23); - } - - switch (i & 3) { - case 0: pcurvalue[0] |= val; break; - case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break; - case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break; - case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break; - } - } -} - -void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components) + GO_ShaderCode, + GO_ShaderUid, +}; +// TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? +template +void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) { - // Just store all used registers here without caring whether we need all bits or less. - memset(uid->values, 0, sizeof(uid->values)); - u32* ptr = uid->values; - *ptr++ = components; - *ptr++ = xfregs.numTexGen.hex; - *ptr++ = xfregs.numChan.hex; - *ptr++ = xfregs.dualTexTrans.hex; + object.Write("struct VS_OUTPUT {\n"); + object.Write(" float4 pos : POSITION;\n"); + object.Write(" float4 colors_0 : COLOR0;\n"); + object.Write(" float4 colors_1 : COLOR1;\n"); - for (int i = 0; i < 2; ++i) { - *ptr++ = xfregs.color[i].hex; - *ptr++ = xfregs.alpha[i].hex; - } - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - for (unsigned int i = 0; i < 8; ++i) { - *ptr++ = xfregs.texMtxInfo[i].hex; - *ptr++ = xfregs.postMtxInfo[i].hex; - } - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - - -void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - VERTEXSHADERUIDSAFE new_id; - GetSafeVertexShaderId(&new_id, components); - - if (!(old_id == new_id)) + if (xfregs.numTexGen.numTexGens < 7) { - std::string new_code(GenerateVertexShaderCode(components, api)); - if (old_code != new_code) + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + object.Write(" float3 tex%d : TEXCOORD%d;\n", i, i); + + object.Write(" float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); +/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) +/// object.Write(" float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); + } + else + { + // clip position is in w of first 4 texcoords +/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) +/// { +/// for (int i = 0; i < 8; ++i) +/// object.Write(" float4 tex%d : TEXCOORD%d;\n", i, i); +/// } +/// else { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file(szTemp); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + object.Write(" float%d tex%d : TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, i); } } + object.Write("};\n"); } - -static char text[16384]; - -#define WRITE p+=sprintf - -char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type) +template +void _GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) { - WRITE(p, "struct VS_OUTPUT {\n"); - WRITE(p, " float4 pos : POSITION;\n"); - WRITE(p, " float4 colors_0 : COLOR0;\n"); - WRITE(p, " float4 colors_1 : COLOR1;\n"); +#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; + const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; + const char* swizzle = "xyzw"; + if (coloralpha == 1 ) swizzle = "xyz"; + else if (coloralpha == 2 ) swizzle = "w"; - if (xfregs.numTexGen.numTexGens < 7) { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " float3 tex%d : TEXCOORD%d;\n", i, i); - WRITE(p, " float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, " float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); - } else { - // clip position is in w of first 4 texcoords - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); + SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); + if (!(chan.attnfunc & 1)) { + // atten disabled + switch (chan.diffusefunc) { + case LIGHTDIF_NONE: + object.Write("lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); + object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); + break; + default: _assert_(0); + } + } + else { // spec and spot + + if (chan.attnfunc == 3) + { // spot + object.Write("ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); + object.Write("dist2 = dot(ldir, ldir);\n" + "dist = sqrt(dist2);\n" + "ldir = ldir / dist;\n" + "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); + object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); + } + else if (chan.attnfunc == 1) + { // specular + object.Write("ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); + object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); + object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); + } + + switch (chan.diffusefunc) { - for (int i = 0; i < 8; ++i) - WRITE(p, " float4 tex%d : TEXCOORD%d;\n", i, i); + case LIGHTDIF_NONE: + object.Write("lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", + swizzle, + chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", + lightsName, + index, + swizzle); + break; + default: _assert_(0); + } + } + object.Write("\n"); +} + +// vertex shader +// lights/colors +// materials name is I_MATERIALS in vs and I_PMATERIALS in ps +// inColorName is color in vs and colors_ in ps +// dest is o.colors_ in vs and colors_ in ps +template +void _GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +{ + for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) + { + const LitChannel& color = xfregs.color[j]; + const LitChannel& alpha = xfregs.alpha[j]; + + object.Write("{\n"); + + SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); + if (color.matsource) {// from vertex + if (components & (VB_HAS_COL0 << j)) + object.Write("mat = %s%d;\n", inColorName, j); + else if (components & VB_HAS_COL0) + object.Write("mat = %s0;\n", inColorName); + else + object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + } + else // from color + object.Write("mat = %s.C%d;\n", materialsName, j+2); + + SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); + if (color.enablelighting) { + SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); + if (color.ambsource) { // from vertex + if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); + } + } + } + } + + // no shared lights + for (int i = 0; i < 8; ++i) + { + if (!(mask&(1<(object, i, j, lightsName, 1); + if (!(mask&(1<(object, i, j+2, lightsName, 2); + } + } + else if (color.enablelighting || alpha.enablelighting) + { + // lights are disabled on one channel so process only the active ones + const LitChannel& workingchannel = color.enablelighting ? color : alpha; + const int lit_index = color.enablelighting ? j : (j+2); + int coloralpha = color.enablelighting ? 1 : 2; + + SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); + for (int i = 0; i < 8; ++i) + { + if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); + } + } + object.Write("%s%d = mat * saturate(lacc);\n", dest, j); + object.Write("}\n"); + } } -const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type) +// TODO: Problem: this one uses copy constructors or sth for uids when returning... +template +void GenerateShader(T& out, u32 components, API_TYPE api_type) { - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - text[sizeof(text) - 1] = 0x7C; // canary +#undef SetUidField +#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; + + if (type == GO_ShaderCode) + { + out.SetBuffer(text); + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + } + + /// text[sizeof(text) - 1] = 0x7C; // canary - _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); - _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); - bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); u32 lightMask = 0; if (xfregs.numChan.numColorChans > 0) @@ -176,141 +287,146 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type) if (xfregs.numChan.numColorChans > 1) lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); - char *p = text; - WRITE(p, "//Vertex Shader: comp:%x, \n", components); - WRITE(p, "typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_" I_POSNORMALMATRIX";\n" - "typedef struct { float4 t; } FLT4;\n" - "typedef struct { FLT4 T[24]; } s_" I_TEXMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_" I_TRANSFORMMATRICES";\n" - "typedef struct { FLT4 T[32]; } s_" I_NORMALMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_" I_POSTTRANSFORMMATRICES";\n" - "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" - "typedef struct { Light lights[8]; } s_" I_LIGHTS";\n" - "typedef struct { float4 C0, C1, C2, C3; } s_" I_MATERIALS";\n" - "typedef struct { float4 T0, T1, T2, T3; } s_" I_PROJECTION";\n" - ); + out.Write("//Vertex Shader: comp:%x, \n", components); + out.Write("typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_" I_POSNORMALMATRIX";\n" + "typedef struct { float4 t; } FLT4;\n" + "typedef struct { FLT4 T[24]; } s_" I_TEXMATRICES";\n" + "typedef struct { FLT4 T[64]; } s_" I_TRANSFORMMATRICES";\n" + "typedef struct { FLT4 T[32]; } s_" I_NORMALMATRICES";\n" + "typedef struct { FLT4 T[64]; } s_" I_POSTTRANSFORMMATRICES";\n" + "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" + "typedef struct { Light lights[8]; } s_" I_LIGHTS";\n" + "typedef struct { float4 C0, C1, C2, C3; } s_" I_MATERIALS";\n" + "typedef struct { float4 T0, T1, T2, T3; } s_" I_PROJECTION";\n" + ); - p = GenerateVSOutputStruct(p, components, api_type); +/// p = GenerateVSOutputStruct(p, components, api_type); + GenerateVSOutputStruct(out, components, api_type); // uniforms - WRITE(p, "uniform s_" I_TRANSFORMMATRICES" " I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); - WRITE(p, "uniform s_" I_TEXMATRICES" " I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); // also using tex matrices - WRITE(p, "uniform s_" I_NORMALMATRICES" " I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); - WRITE(p, "uniform s_" I_POSNORMALMATRIX" " I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); - WRITE(p, "uniform s_" I_POSTTRANSFORMMATRICES" " I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); - WRITE(p, "uniform s_" I_LIGHTS" " I_LIGHTS" : register(c%d);\n", C_LIGHTS); - WRITE(p, "uniform s_" I_MATERIALS" " I_MATERIALS" : register(c%d);\n", C_MATERIALS); - WRITE(p, "uniform s_" I_PROJECTION" " I_PROJECTION" : register(c%d);\n", C_PROJECTION); - WRITE(p, "uniform float4 " I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); + out.Write("uniform s_" I_TRANSFORMMATRICES" " I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); + out.Write("uniform s_" I_TEXMATRICES" " I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); + out.Write("uniform s_" I_NORMALMATRICES" " I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); + out.Write("uniform s_" I_POSNORMALMATRIX" " I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); + out.Write("uniform s_" I_POSTTRANSFORMMATRICES" " I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); + out.Write("uniform s_" I_LIGHTS" " I_LIGHTS" : register(c%d);\n", C_LIGHTS); + out.Write("uniform s_" I_MATERIALS" " I_MATERIALS" : register(c%d);\n", C_MATERIALS); + out.Write("uniform s_" I_PROJECTION" " I_PROJECTION" : register(c%d);\n", C_PROJECTION); + out.Write("uniform float4 " I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); - WRITE(p, "VS_OUTPUT main(\n"); - + out.Write("VS_OUTPUT main(\n"); + + SetUidField(numTexGens, xfregs.numTexGen.numTexGens); + SetUidField(components, components); // inputs if (components & VB_HAS_NRM0) - WRITE(p, " float3 rawnorm0 : NORMAL0,\n"); - if (components & VB_HAS_NRM1) { + out.Write(" float3 rawnorm0 : NORMAL0,\n"); + if (components & VB_HAS_NRM1) + { if (is_d3d) - WRITE(p, " float3 rawnorm1 : NORMAL1,\n"); + out.Write(" float3 rawnorm1 : NORMAL1,\n"); else - WRITE(p, " float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); + out.Write(" float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); } - if (components & VB_HAS_NRM2) { + if (components & VB_HAS_NRM2) + { if (is_d3d) - WRITE(p, " float3 rawnorm2 : NORMAL2,\n"); + out.Write(" float3 rawnorm2 : NORMAL2,\n"); else - WRITE(p, " float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); + out.Write(" float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); } if (components & VB_HAS_COL0) - WRITE(p, " float4 color0 : COLOR0,\n"); + out.Write(" float4 color0 : COLOR0,\n"); if (components & VB_HAS_COL1) - WRITE(p, " float4 color1 : COLOR1,\n"); + out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); - WRITE(p, "float3 N0 = " I_NORMALMATRICES".T[normidx].t.xyz, N1 = " I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = " I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); + out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); + out.Write("float3 N0 = " I_NORMALMATRICES".T[normidx].t.xyz, N1 = " I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = " I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); } if (components & VB_HAS_NRM0) - WRITE(p, "float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); + out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); if (components & VB_HAS_NRM1) - WRITE(p, "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); + out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) - WRITE(p, "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); + out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } else { - WRITE(p, "float4 pos = float4(dot(" I_POSNORMALMATRIX".T0, rawpos), dot(" I_POSNORMALMATRIX".T1, rawpos), dot(" I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); + out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX".T0, rawpos), dot(" I_POSNORMALMATRIX".T1, rawpos), dot(" I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); if (components & VB_HAS_NRM0) - WRITE(p, "float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); + out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); if (components & VB_HAS_NRM1) - WRITE(p, "float3 _norm1 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); + out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); if (components & VB_HAS_NRM2) - WRITE(p, "float3 _norm2 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); + out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); } if (!(components & VB_HAS_NRM0)) - WRITE(p, "float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); + out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); - - WRITE(p, "o.pos = float4(dot(" I_PROJECTION".T0, pos), dot(" I_PROJECTION".T1, pos), dot(" I_PROJECTION".T2, pos), dot(" I_PROJECTION".T3, pos));\n"); - WRITE(p, "float4 mat, lacc;\n" - "float3 ldir, h;\n" - "float dist, dist2, attn;\n"); + out.Write("o.pos = float4(dot(" I_PROJECTION".T0, pos), dot(" I_PROJECTION".T1, pos), dot(" I_PROJECTION".T2, pos), dot(" I_PROJECTION".T3, pos));\n"); + out.Write("float4 mat, lacc;\n" + "float3 ldir, h;\n" + "float dist, dist2, attn;\n"); + + SetUidField(numColorChans, xfregs.numChan.numColorChans); if(xfregs.numChan.numColorChans == 0) { if (components & VB_HAS_COL0) - WRITE(p, "o.colors_0 = color0;\n"); + out.Write("o.colors_0 = color0;\n"); else - WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); } // TODO: This probably isn't necessary if pixel lighting is enabled. - p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + _GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); if(xfregs.numChan.numColorChans < 2) { if (components & VB_HAS_COL1) - WRITE(p, "o.colors_1 = color1;\n"); + out.Write("o.colors_1 = color1;\n"); else - WRITE(p, "o.colors_1 = o.colors_0;\n"); + out.Write("o.colors_1 = o.colors_0;\n"); } // special case if only pos and tex coord 0 and tex coord input is AB11 // donko - this has caused problems in some games. removed for now. @@ -322,21 +438,22 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type) */ // transform texcoords - WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; - WRITE(p, "{\n"); - WRITE(p, "coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + out.Write("{\n"); + out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + SetUidField(texMtxInfo[i].sourcerow, xfregs.texMtxInfo[i].sourcerow); switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = rawpos;\n"); // pos.w is 1 + out.Write("coord = rawpos;\n"); // pos.w is 1 break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n"); + out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n"); } break; case XF_SRCCOLORS_INROW: @@ -345,72 +462,79 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type) case XF_SRCBINORMAL_T_INROW: if (components & VB_HAS_NRM1) { _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n"); + out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n"); + out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n"); } break; default: _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) - WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); + out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } // first transformation + SetUidField(texMtxInfo[i].texgentype, xfregs.texMtxInfo[i].texgentype); switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { // transform the light dir into tangent space - WRITE(p, "ldir = normalize(" I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); - WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); + SetUidField(texMtxInfo[i].embosslightshift, xfregs.texMtxInfo[i].embosslightshift); + SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); + out.Write("ldir = normalize(" I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); + out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); } else { _assert_(0); // should have normals - WRITE(p, "o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); + SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); + out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } break; case XF_TEXGEN_COLOR_STRGBC0: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - WRITE(p, "o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); + out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); break; case XF_TEXGEN_COLOR_STRGBC1: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - WRITE(p, "o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); + out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); break; case XF_TEXGEN_REGULAR: default: + SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); if (components & (VB_HAS_TEXMTXIDX0<(object, components, api_type); +} + +void GenerateShaderCode(ShaderCode& object, u32 components, API_TYPE api_type) +{ + return GenerateShader(object, components, api_type); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index cb253a9b6c..11bb5c29f6 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -18,6 +18,7 @@ #ifndef GCOGL_VERTEXSHADER_H #define GCOGL_VERTEXSHADER_H +#include #include "XFMemory.h" #include "VideoCommon.h" @@ -48,71 +49,106 @@ #define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) #define C_VENVCONST_END (C_DEPTHPARAMS + 4) -template -class _VERTEXSHADERUID +// TODO: Need packing? +struct uid_data +{ + u32 components; + u32 numColorChans : 2; + u32 numTexGens : 4; + + struct { + u32 projection : 1; // XF_TEXPROJ_X + u32 inputform : 2; // XF_TEXINPUT_X + u32 texgentype : 3; // XF_TEXGEN_X + u32 sourcerow : 5; // XF_SRCGEOM_X + u32 embosssourceshift : 3; // what generated texcoord to use + u32 embosslightshift : 3; // light index that is used + } texMtxInfo[8]; + struct { + u32 index : 6; // base row of dual transform matrix + u32 normalize : 1; // normalize before send operation + } postMtxInfo[8]; + struct { + u32 enabled : 1; + } dualTexTrans; + struct { + u32 matsource : 1; + u32 enablelighting : 1; + u32 ambsource : 1; + u32 diffusefunc : 2; + u32 attnfunc : 2; + u32 light_mask : 8; + } lit_chans[4]; +}; + + +class ShaderUid { -#define NUM_VSUID_VALUES_SAFE 25 public: - u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9]; - - _VERTEXSHADERUID() + ShaderUid() { + memset(values, 0, sizeof(values)); } - _VERTEXSHADERUID(const _VERTEXSHADERUID& r) + void Write(const char* fmt, ...) {} + const char* GetBuffer() { return NULL; } + void SetBuffer(char* buffer) { } + + bool operator == (const ShaderUid& obj) const { - for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) - values[i] = r.values[i]; + return memcmp(this->values, obj.values, sizeof(values)) == 0; } - int GetNumValues() const + // TODO: Store last frame used and order by that? makes much more sense anyway... + bool operator < (const ShaderUid& obj) const { - if (safe) return NUM_VSUID_VALUES_SAFE; - else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1 - } - - bool operator <(const _VERTEXSHADERUID& _Right) const - { - if (values[0] < _Right.values[0]) - return true; - else if (values[0] > _Right.values[0]) - return false; - int N = GetNumValues(); - for (int i = 1; i < N; ++i) + for (int i = 0; i < 24; ++i) { - if (values[i] < _Right.values[i]) + if (this->values[i] < obj.values[i]) return true; - else if (values[i] > _Right.values[i]) + else if (this->values[i] > obj.values[i]) return false; } return false; } - bool operator ==(const _VERTEXSHADERUID& _Right) const + uid_data& GetUidData() { return data; } + +private: + union { - if (values[0] != _Right.values[0]) - return false; - int N = GetNumValues(); - for (int i = 1; i < N; ++i) - { - if (values[i] != _Right.values[i]) - return false; - } - return true; - } + uid_data data; + u32 values[24]; // TODO: Length? + }; }; -typedef _VERTEXSHADERUID VERTEXSHADERUID; -typedef _VERTEXSHADERUID VERTEXSHADERUIDSAFE; +class ShaderCode +{ +public: + ShaderCode() : buf(NULL), write_ptr(NULL) + { -// components is included in the uid. -char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type); -const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type); + } -void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components); -void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components); + void Write(const char* fmt, ...) + { + va_list arglist; + va_start(arglist, fmt); + write_ptr += vsprintf(write_ptr, fmt, arglist); + va_end(arglist); + } + + const char* GetBuffer() { return buf; } + void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } + uid_data& GetUidData() { return *(uid_data*)NULL; } + +private: + const char* buf; + char* write_ptr; +}; + +void GenerateShaderUid(ShaderUid& object, u32 components, API_TYPE api_type); +void GenerateShaderCode(ShaderCode& object, u32 components, API_TYPE api_type); -// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes -void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components); #endif // GCOGL_VERTEXSHADER_H diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index dbbb7ee29b..7eaf1546da 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -42,7 +42,7 @@ GLuint VertexShaderCache::CurrentShader; bool VertexShaderCache::ShaderEnabled; VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry = NULL; -VERTEXSHADERUID VertexShaderCache::last_uid; +ShaderUid VertexShaderCache::last_uid; static int s_nMaxVertexInstructions; @@ -74,14 +74,14 @@ void VertexShaderCache::Shutdown() VERTEXSHADER* VertexShaderCache::SetShader(u32 components) { - VERTEXSHADERUID uid; - GetVertexShaderId(&uid, components); + // Possible optimization: Don't always generate the shader uid, but keep track of changes in BPStructs instead + ShaderUid uid; + GenerateShaderUid(uid, components, API_OPENGL); if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_OPENGL, vshaders[uid].safe_uid, vshaders[uid].shader.strprog, components); return &last_entry->shader; } } @@ -95,15 +95,14 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components) last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, components); return &last_entry->shader; } // Make an entry in the table VSCacheEntry& entry = vshaders[uid]; last_entry = &entry; - const char *code = GenerateVertexShaderCode(components, API_OPENGL); - GetSafeVertexShaderId(&entry.safe_uid, components); + ShaderCode code; + GenerateShaderCode(code, components, API_OPENGL); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { @@ -111,11 +110,11 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components) char szTemp[MAX_PATH]; sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, code); + SaveData(szTemp, code.GetBuffer()); } #endif - if (!code || !VertexShaderCache::CompileVertexShader(entry.shader, code)) { + if (!code.GetBuffer() || !VertexShaderCache::CompileVertexShader(entry.shader, code.GetBuffer())) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return NULL; } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h index 6f4cbe25c2..f3831b3f7b 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h @@ -40,7 +40,7 @@ class VertexShaderCache struct VSCacheEntry { VERTEXSHADER shader; - VERTEXSHADERUIDSAFE safe_uid; + ShaderUid safe_uid; VSCacheEntry() {} void Destroy() { // printf("Destroying vs %i\n", shader.glprogid); @@ -49,12 +49,12 @@ class VertexShaderCache } }; - typedef std::map VSCache; + typedef std::map VSCache; static VSCache vshaders; static VSCacheEntry* last_entry; - static VERTEXSHADERUID last_uid; + static ShaderUid last_uid; // TODO: Use reference instead.. static GLuint CurrentShader; static bool ShaderEnabled; From 3c8df842bba1067ce8f7f41dd1c13cb4f047fc90 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 6 Aug 2012 23:09:43 +0200 Subject: [PATCH 02/54] Moved some of the new shader uid stuff to a common header file. --- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 94 +++ .../Core/VideoCommon/Src/VertexShaderGen.cpp | 10 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp~ | 645 ++++++++++++++++++ Source/Core/VideoCommon/Src/VertexShaderGen.h | 72 +- .../Core/VideoCommon/Src/VertexShaderGen.h~ | 92 +++ .../Plugin_VideoOGL/Src/VertexShaderCache.cpp | 10 +- .../Src/VertexShaderCache.cpp~ | 271 ++++++++ .../Plugin_VideoOGL/Src/VertexShaderCache.h | 6 +- .../Plugin_VideoOGL/Src/VertexShaderCache.h~ | 76 +++ 9 files changed, 1196 insertions(+), 80 deletions(-) create mode 100644 Source/Core/VideoCommon/Src/ShaderGenCommon.h create mode 100644 Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ create mode 100644 Source/Core/VideoCommon/Src/VertexShaderGen.h~ create mode 100644 Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp~ create mode 100644 Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h~ diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h new file mode 100644 index 0000000000..28fb8e8956 --- /dev/null +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -0,0 +1,94 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _SHADERGENCOMMON_H +#define _SHADERGENCOMMON_H + +#include +#include +#include +#include "CommonTypes.h" + +template +class ShaderUid +{ +public: + ShaderUid() + { + memset(values, 0, sizeof(values)); + } + + void Write(const char* fmt, ...) {} + const char* GetBuffer() { return NULL; } + void SetBuffer(char* buffer) { } + + bool operator == (const ShaderUid& obj) const + { + return memcmp(this->values, obj.values, sizeof(values)) == 0; + } + + // TODO: Store last frame used and order by that? makes much more sense anyway... + bool operator < (const ShaderUid& obj) const + { + for (int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) + { + if (this->values[i] < obj.values[i]) + return true; + else if (this->values[i] > obj.values[i]) + return false; + } + return false; + } + + uid_data& GetUidData() { return data; } + +private: + union + { + uid_data data; + u32 values[sizeof(uid_data) / sizeof(u32)]; + }; +}; + +// Needs to be a template for hacks... +template +class ShaderCode +{ +public: + ShaderCode() : buf(NULL), write_ptr(NULL) + { + + } + + void Write(const char* fmt, ...) + { + va_list arglist; + va_start(arglist, fmt); + write_ptr += vsprintf(write_ptr, fmt, arglist); + va_end(arglist); + } + + const char* GetBuffer() { return buf; } + void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } + uid_data& GetUidData() { return *(uid_data*)NULL; } + +private: + const char* buf; + char* write_ptr; +}; + +#endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index d7134b6dcb..acc89c6a54 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -267,7 +267,7 @@ void _GenerateLightingShader(T& object, int components, const char* materialsNam // TODO: Problem: this one uses copy constructors or sth for uids when returning... template -void GenerateShader(T& out, u32 components, API_TYPE api_type) +void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { #undef SetUidField #define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; @@ -635,12 +635,12 @@ void GenerateShader(T& out, u32 components, API_TYPE api_type) setlocale(LC_NUMERIC, ""); // restore locale } -void GenerateShaderUid(ShaderUid& object, u32 components, API_TYPE api_type) +void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type) { - return GenerateShader(object, components, api_type); + GenerateVertexShader(object, components, api_type); } -void GenerateShaderCode(ShaderCode& object, u32 components, API_TYPE api_type) +void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) { - return GenerateShader(object, components, api_type); + GenerateVertexShader(object, components, api_type); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ new file mode 100644 index 0000000000..be57103918 --- /dev/null +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ @@ -0,0 +1,645 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include +#include + +#include "NativeVertexFormat.h" + +#include "BPMemory.h" +#include "CPMemory.h" +#include "LightingShaderGen.h" +#include "VertexShaderGen.h" +#include "VideoConfig.h" + +static char text[16768]; + +enum GenOutput +{ + GO_ShaderCode, + GO_ShaderUid, +}; +// TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? +template +void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) +{ + object.Write("struct VS_OUTPUT {\n"); + object.Write(" float4 pos : POSITION;\n"); + object.Write(" float4 colors_0 : COLOR0;\n"); + object.Write(" float4 colors_1 : COLOR1;\n"); + + if (xfregs.numTexGen.numTexGens < 7) + { + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + object.Write(" float3 tex%d : TEXCOORD%d;\n", i, i); + + object.Write(" float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); +/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) +/// object.Write(" float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); + } + else + { + // clip position is in w of first 4 texcoords +/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) +/// { +/// for (int i = 0; i < 8; ++i) +/// object.Write(" float4 tex%d : TEXCOORD%d;\n", i, i); +/// } +/// else + { + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + object.Write(" float%d tex%d : TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, i); + } + } + object.Write("};\n"); +} + +template +void _GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) +{ +#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; + const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; + const char* swizzle = "xyzw"; + if (coloralpha == 1 ) swizzle = "xyz"; + else if (coloralpha == 2 ) swizzle = "w"; + + SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); + SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); + if (!(chan.attnfunc & 1)) { + // atten disabled + switch (chan.diffusefunc) { + case LIGHTDIF_NONE: + object.Write("lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); + object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); + break; + default: _assert_(0); + } + } + else { // spec and spot + + if (chan.attnfunc == 3) + { // spot + object.Write("ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); + object.Write("dist2 = dot(ldir, ldir);\n" + "dist = sqrt(dist2);\n" + "ldir = ldir / dist;\n" + "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); + object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); + } + else if (chan.attnfunc == 1) + { // specular + object.Write("ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); + object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); + object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); + } + + switch (chan.diffusefunc) + { + case LIGHTDIF_NONE: + object.Write("lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", + swizzle, + chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", + lightsName, + index, + swizzle); + break; + default: _assert_(0); + } + } + object.Write("\n"); +} + +// vertex shader +// lights/colors +// materials name is I_MATERIALS in vs and I_PMATERIALS in ps +// inColorName is color in vs and colors_ in ps +// dest is o.colors_ in vs and colors_ in ps +template +void _GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +{ + for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) + { + const LitChannel& color = xfregs.color[j]; + const LitChannel& alpha = xfregs.alpha[j]; + + object.Write("{\n"); + + SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); + if (color.matsource) {// from vertex + if (components & (VB_HAS_COL0 << j)) + object.Write("mat = %s%d;\n", inColorName, j); + else if (components & VB_HAS_COL0) + object.Write("mat = %s0;\n", inColorName); + else + object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + } + else // from color + object.Write("mat = %s.C%d;\n", materialsName, j+2); + + SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); + if (color.enablelighting) { + SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); + if (color.ambsource) { // from vertex + if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); + } + } + } + } + + // no shared lights + for (int i = 0; i < 8; ++i) + { + if (!(mask&(1<(object, i, j, lightsName, 1); + if (!(mask&(1<(object, i, j+2, lightsName, 2); + } + } + else if (color.enablelighting || alpha.enablelighting) + { + // lights are disabled on one channel so process only the active ones + const LitChannel& workingchannel = color.enablelighting ? color : alpha; + const int lit_index = color.enablelighting ? j : (j+2); + int coloralpha = color.enablelighting ? 1 : 2; + + SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); + for (int i = 0; i < 8; ++i) + { + if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); + } + } + object.Write("%s%d = mat * saturate(lacc);\n", dest, j); + object.Write("}\n"); + } +} + +// TODO: Problem: this one uses copy constructors or sth for uids when returning... +template +void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) +{ +#undef SetUidField +#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; + + if (type == GO_ShaderCode) + { + out.SetBuffer(text); + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + } + + /// text[sizeof(text) - 1] = 0x7C; // canary + + bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); + u32 lightMask = 0; + if (xfregs.numChan.numColorChans > 0) + lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); + if (xfregs.numChan.numColorChans > 1) + lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); + + out.Write("//Vertex Shader: comp:%x, \n", components); + out.Write("typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_" I_POSNORMALMATRIX";\n" + "typedef struct { float4 t; } FLT4;\n" + "typedef struct { FLT4 T[24]; } s_" I_TEXMATRICES";\n" + "typedef struct { FLT4 T[64]; } s_" I_TRANSFORMMATRICES";\n" + "typedef struct { FLT4 T[32]; } s_" I_NORMALMATRICES";\n" + "typedef struct { FLT4 T[64]; } s_" I_POSTTRANSFORMMATRICES";\n" + "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" + "typedef struct { Light lights[8]; } s_" I_LIGHTS";\n" + "typedef struct { float4 C0, C1, C2, C3; } s_" I_MATERIALS";\n" + "typedef struct { float4 T0, T1, T2, T3; } s_" I_PROJECTION";\n" + ); + +/// p = GenerateVSOutputStruct(p, components, api_type); + GenerateVSOutputStruct(out, components, api_type); + + // uniforms + + out.Write("uniform s_" I_TRANSFORMMATRICES" " I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); + out.Write("uniform s_" I_TEXMATRICES" " I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); + out.Write("uniform s_" I_NORMALMATRICES" " I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); + out.Write("uniform s_" I_POSNORMALMATRIX" " I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); + out.Write("uniform s_" I_POSTTRANSFORMMATRICES" " I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); + out.Write("uniform s_" I_LIGHTS" " I_LIGHTS" : register(c%d);\n", C_LIGHTS); + out.Write("uniform s_" I_MATERIALS" " I_MATERIALS" : register(c%d);\n", C_MATERIALS); + out.Write("uniform s_" I_PROJECTION" " I_PROJECTION" : register(c%d);\n", C_PROJECTION); + out.Write("uniform float4 " I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); + + out.Write("VS_OUTPUT main(\n"); + + SetUidField(numTexGens, xfregs.numTexGen.numTexGens); + SetUidField(components, components); + // inputs + if (components & VB_HAS_NRM0) + out.Write(" float3 rawnorm0 : NORMAL0,\n"); + if (components & VB_HAS_NRM1) + { + if (is_d3d) + out.Write(" float3 rawnorm1 : NORMAL1,\n"); + else + out.Write(" float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); + } + if (components & VB_HAS_NRM2) + { + if (is_d3d) + out.Write(" float3 rawnorm2 : NORMAL2,\n"); + else + out.Write(" float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); + } + if (components & VB_HAS_COL0) + out.Write(" float4 color0 : COLOR0,\n"); + if (components & VB_HAS_COL1) + out.Write(" float4 color1 : COLOR1,\n"); + for (int i = 0; i < 8; ++i) { + u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); + out.Write("float3 N0 = " I_NORMALMATRICES".T[normidx].t.xyz, N1 = " I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = " I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); + } + + if (components & VB_HAS_NRM0) + out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); + if (components & VB_HAS_NRM1) + out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); + if (components & VB_HAS_NRM2) + out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); + } + else + { + out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX".T0, rawpos), dot(" I_POSNORMALMATRIX".T1, rawpos), dot(" I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); + if (components & VB_HAS_NRM0) + out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); + if (components & VB_HAS_NRM1) + out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); + if (components & VB_HAS_NRM2) + out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); + } + + if (!(components & VB_HAS_NRM0)) + out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); + + + + out.Write("o.pos = float4(dot(" I_PROJECTION".T0, pos), dot(" I_PROJECTION".T1, pos), dot(" I_PROJECTION".T2, pos), dot(" I_PROJECTION".T3, pos));\n"); + + out.Write("float4 mat, lacc;\n" + "float3 ldir, h;\n" + "float dist, dist2, attn;\n"); + + SetUidField(numColorChans, xfregs.numChan.numColorChans); + if(xfregs.numChan.numColorChans == 0) + { + if (components & VB_HAS_COL0) + out.Write("o.colors_0 = color0;\n"); + else + out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + } + + // TODO: This probably isn't necessary if pixel lighting is enabled. + _GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + + if(xfregs.numChan.numColorChans < 2) + { + if (components & VB_HAS_COL1) + out.Write("o.colors_1 = color1;\n"); + else + out.Write("o.colors_1 = o.colors_0;\n"); + } + // special case if only pos and tex coord 0 and tex coord input is AB11 + // donko - this has caused problems in some games. removed for now. + bool texGenSpecialCase = false; + /*bool texGenSpecialCase = + ((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 + (g_VtxDesc.Tex0Coord != NOT_PRESENT) && + (xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); + */ + + // transform texcoords + out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { + TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; + + out.Write("{\n"); + out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); + SetUidField(texMtxInfo[i].sourcerow, xfregs.texMtxInfo[i].sourcerow); + switch (texinfo.sourcerow) { + case XF_SRCGEOM_INROW: + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + out.Write("coord = rawpos;\n"); // pos.w is 1 + break; + case XF_SRCNORMAL_INROW: + if (components & VB_HAS_NRM0) { + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n"); + } + break; + case XF_SRCCOLORS_INROW: + _assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 ); + break; + case XF_SRCBINORMAL_T_INROW: + if (components & VB_HAS_NRM1) { + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n"); + } + break; + case XF_SRCBINORMAL_B_INROW: + if (components & VB_HAS_NRM2) { + _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); + out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n"); + } + break; + default: + _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); + if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) + out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); + break; + } + + // first transformation + SetUidField(texMtxInfo[i].texgentype, xfregs.texMtxInfo[i].texgentype); + switch (texinfo.texgentype) { + case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map + + if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { + // transform the light dir into tangent space + SetUidField(texMtxInfo[i].embosslightshift, xfregs.texMtxInfo[i].embosslightshift); + SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); + out.Write("ldir = normalize(" I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); + out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); + } + else + { + _assert_(0); // should have normals + SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); + out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); + } + + break; + case XF_TEXGEN_COLOR_STRGBC0: + _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); + out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); + break; + case XF_TEXGEN_COLOR_STRGBC1: + _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); + out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); + break; + case XF_TEXGEN_REGULAR: + default: + SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); + if (components & (VB_HAS_TEXMTXIDX0<(object, components, api_type); +} + +void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) +{ + GenerateVertexShader(object, components, api_type); +} diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 11bb5c29f6..32d3eff91b 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -21,6 +21,7 @@ #include #include "XFMemory.h" #include "VideoCommon.h" +#include "ShaderGenCommon.h" #define SHADER_POSMTX_ATTRIB 1 #define SHADER_NORM1_ATTRIB 6 @@ -81,74 +82,11 @@ struct uid_data } lit_chans[4]; }; +typedef ShaderUid VertexShaderUid; +typedef ShaderCode VertexShaderCode; -class ShaderUid -{ -public: - ShaderUid() - { - memset(values, 0, sizeof(values)); - } - - void Write(const char* fmt, ...) {} - const char* GetBuffer() { return NULL; } - void SetBuffer(char* buffer) { } - - bool operator == (const ShaderUid& obj) const - { - return memcmp(this->values, obj.values, sizeof(values)) == 0; - } - - // TODO: Store last frame used and order by that? makes much more sense anyway... - bool operator < (const ShaderUid& obj) const - { - for (int i = 0; i < 24; ++i) - { - if (this->values[i] < obj.values[i]) - return true; - else if (this->values[i] > obj.values[i]) - return false; - } - return false; - } - - uid_data& GetUidData() { return data; } - -private: - union - { - uid_data data; - u32 values[24]; // TODO: Length? - }; -}; - -class ShaderCode -{ -public: - ShaderCode() : buf(NULL), write_ptr(NULL) - { - - } - - void Write(const char* fmt, ...) - { - va_list arglist; - va_start(arglist, fmt); - write_ptr += vsprintf(write_ptr, fmt, arglist); - va_end(arglist); - } - - const char* GetBuffer() { return buf; } - void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } - uid_data& GetUidData() { return *(uid_data*)NULL; } - -private: - const char* buf; - char* write_ptr; -}; - -void GenerateShaderUid(ShaderUid& object, u32 components, API_TYPE api_type); -void GenerateShaderCode(ShaderCode& object, u32 components, API_TYPE api_type); +void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); +void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); #endif // GCOGL_VERTEXSHADER_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h~ b/Source/Core/VideoCommon/Src/VertexShaderGen.h~ new file mode 100644 index 0000000000..050ed76649 --- /dev/null +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h~ @@ -0,0 +1,92 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef GCOGL_VERTEXSHADER_H +#define GCOGL_VERTEXSHADER_H + +#include +#include "XFMemory.h" +#include "VideoCommon.h" +#include "ShaderGenCommon.h" + +#define SHADER_POSMTX_ATTRIB 1 +#define SHADER_NORM1_ATTRIB 6 +#define SHADER_NORM2_ATTRIB 7 + + +// shader variables +#define I_POSNORMALMATRIX "cpnmtx" +#define I_PROJECTION "cproj" +#define I_MATERIALS "cmtrl" +#define I_LIGHTS "clights" +#define I_TEXMATRICES "ctexmtx" +#define I_TRANSFORMMATRICES "ctrmtx" +#define I_NORMALMATRICES "cnmtx" +#define I_POSTTRANSFORMMATRICES "cpostmtx" +#define I_DEPTHPARAMS "cDepth" // farZ, zRange, scaled viewport width, scaled viewport height + +#define C_POSNORMALMATRIX 0 +#define C_PROJECTION (C_POSNORMALMATRIX + 6) +#define C_MATERIALS (C_PROJECTION + 4) +#define C_LIGHTS (C_MATERIALS + 4) +#define C_TEXMATRICES (C_LIGHTS + 40) +#define C_TRANSFORMMATRICES (C_TEXMATRICES + 24) +#define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64) +#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32) +#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) +#define C_VENVCONST_END (C_DEPTHPARAMS + 4) + +// TODO: Need packing? +struct uid_data +{ + u32 components; + u32 numColorChans : 2; + u32 numTexGens : 4; + + struct { + u32 projection : 1; // XF_TEXPROJ_X + u32 inputform : 2; // XF_TEXINPUT_X + u32 texgentype : 3; // XF_TEXGEN_X + u32 sourcerow : 5; // XF_SRCGEOM_X + u32 embosssourceshift : 3; // what generated texcoord to use + u32 embosslightshift : 3; // light index that is used + } texMtxInfo[8]; + struct { + u32 index : 6; // base row of dual transform matrix + u32 normalize : 1; // normalize before send operation + } postMtxInfo[8]; + struct { + u32 enabled : 1; + } dualTexTrans; + struct { + u32 matsource : 1; + u32 enablelighting : 1; + u32 ambsource : 1; + u32 diffusefunc : 2; + u32 attnfunc : 2; + u32 light_mask : 8; + } lit_chans[4]; +}; + +typedef ShaderUid VertexShaderUid; +typedef ShaderCode VertexShaderCode; + +void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); +void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); + + +#endif // GCOGL_VERTEXSHADER_H diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index 7eaf1546da..29dd76fe6d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -42,7 +42,7 @@ GLuint VertexShaderCache::CurrentShader; bool VertexShaderCache::ShaderEnabled; VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry = NULL; -ShaderUid VertexShaderCache::last_uid; +VertexShaderUid VertexShaderCache::last_uid; static int s_nMaxVertexInstructions; @@ -75,8 +75,8 @@ void VertexShaderCache::Shutdown() VERTEXSHADER* VertexShaderCache::SetShader(u32 components) { // Possible optimization: Don't always generate the shader uid, but keep track of changes in BPStructs instead - ShaderUid uid; - GenerateShaderUid(uid, components, API_OPENGL); + VertexShaderUid uid; + GetVertexShaderUid(uid, components, API_OPENGL); if (last_entry) { if (uid == last_uid) @@ -101,8 +101,8 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components) // Make an entry in the table VSCacheEntry& entry = vshaders[uid]; last_entry = &entry; - ShaderCode code; - GenerateShaderCode(code, components, API_OPENGL); + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_OPENGL); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp~ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp~ new file mode 100644 index 0000000000..f42cf09581 --- /dev/null +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp~ @@ -0,0 +1,271 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include + +#include "Globals.h" +#include "VideoConfig.h" +#include "Statistics.h" + +#include "GLUtil.h" + +#include "Render.h" +#include "VertexShaderGen.h" +#include "VertexShaderManager.h" +#include "VertexShaderCache.h" +#include "VertexManager.h" +#include "VertexLoader.h" +#include "XFMemory.h" +#include "ImageWrite.h" +#include "FileUtil.h" +#include "Debugger.h" + +namespace OGL +{ + +VertexShaderCache::VSCache VertexShaderCache::vshaders; +GLuint VertexShaderCache::CurrentShader; +bool VertexShaderCache::ShaderEnabled; + +VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry = NULL; +VertexShaderUid VertexShaderCache::last_uid; + +static int s_nMaxVertexInstructions; + + +void VertexShaderCache::Init() +{ + glEnable(GL_VERTEX_PROGRAM_ARB); + ShaderEnabled = true; + CurrentShader = 0; + last_entry = NULL; + + glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions); + if (strstr((const char*)glGetString(GL_VENDOR), "Humper") != NULL) s_nMaxVertexInstructions = 4096; +#if CG_VERSION_NUM == 2100 + if (strstr((const char*)glGetString(GL_VENDOR), "ATI") != NULL) + { + s_nMaxVertexInstructions = 4096; + } +#endif +} + +void VertexShaderCache::Shutdown() +{ + for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) + iter->second.Destroy(); + vshaders.clear(); +} + + +VERTEXSHADER* VertexShaderCache::SetShader(u32 components) +{ + // Possible optimization: Don't always generate the shader uid, but keep track of changes in BPStructs instead + VertexShaderUid uid; + GetVertexShaderUid(uid, components, API_OPENGL); + if (last_entry) + { + if (uid == last_uid) + { + GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); + return &last_entry->shader; + } + } + + last_uid = uid; + + VSCache::iterator iter = vshaders.find(uid); + if (iter != vshaders.end()) + { + VSCacheEntry &entry = iter->second; + last_entry = &entry; + + GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); + return &last_entry->shader; + } + + // Make an entry in the table + VSCacheEntry& entry = vshaders[uid]; + last_entry = &entry; + ShaderCode code; + GenerateShaderCode(code, components, API_OPENGL); + +#if defined(_DEBUG) || defined(DEBUGFAST) + if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { + static int counter = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); + + SaveData(szTemp, code.GetBuffer()); + } +#endif + + if (!code.GetBuffer() || !VertexShaderCache::CompileVertexShader(entry.shader, code.GetBuffer())) { + GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); + return NULL; + } + + INCSTAT(stats.numVertexShadersCreated); + SETSTAT(stats.numVertexShadersAlive, vshaders.size()); + GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); + return &last_entry->shader; +} + +bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram) +{ + // Reset GL error before compiling shaders. Yeah, we need to investigate the causes of these. + GLenum err = GL_REPORT_ERROR(); + if (err != GL_NO_ERROR) + { + ERROR_LOG(VIDEO, "glError %08x before VS!", err); + } + +#if defined HAVE_CG && HAVE_CG + char stropt[64]; + sprintf(stropt, "MaxLocalParams=256,MaxInstructions=%d", s_nMaxVertexInstructions); + const char *opts[] = {"-profileopts", stropt, "-O2", "-q", NULL}; + CGprogram tempprog = cgCreateProgram(g_cgcontext, CG_SOURCE, pstrprogram, g_cgvProf, "main", opts); + if (!cgIsProgram(tempprog)) { + static int num_failures = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%sbad_vs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); + std::ofstream file(szTemp); + file << pstrprogram; + file.close(); + + PanicAlert("Failed to compile vertex shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%d):\n%s", + szTemp, + g_cgfProf, + cgGetLastListing(g_cgcontext)); + + cgDestroyProgram(tempprog); + ERROR_LOG(VIDEO, "Failed to load vs %s:", cgGetLastListing(g_cgcontext)); + ERROR_LOG(VIDEO, "%s", pstrprogram); + return false; + } + + if (cgGetError() != CG_NO_ERROR) + { + WARN_LOG(VIDEO, "Failed to load vs %s:", cgGetLastListing(g_cgcontext)); + WARN_LOG(VIDEO, "%s", pstrprogram); + } + + // This looks evil - we modify the program through the const char * we got from cgGetProgramString! + // It SHOULD not have any nasty side effects though - but you never know... + char *pcompiledprog = (char*)cgGetProgramString(tempprog, CG_COMPILED_PROGRAM); + char *plocal = strstr(pcompiledprog, "program.local"); + while (plocal != NULL) { + const char* penv = " program.env"; + memcpy(plocal, penv, 13); + plocal = strstr(plocal + 13, "program.local"); + } + glGenProgramsARB(1, &vs.glprogid); + SetCurrentShader(vs.glprogid); + + glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); + err = GL_REPORT_ERROR(); + if (err != GL_NO_ERROR) { + ERROR_LOG(VIDEO, "%s", pstrprogram); + ERROR_LOG(VIDEO, "%s", pcompiledprog); + } + + cgDestroyProgram(tempprog); +#endif + + if (g_ActiveConfig.bEnableShaderDebugging) + vs.strprog = pstrprogram; + + return true; +} + +void VertexShaderCache::DisableShader() +{ + if (ShaderEnabled) + { + glDisable(GL_VERTEX_PROGRAM_ARB); + ShaderEnabled = false; + } +} + + +void VertexShaderCache::SetCurrentShader(GLuint Shader) +{ + if (!ShaderEnabled) + { + glEnable(GL_VERTEX_PROGRAM_ARB); + ShaderEnabled= true; + } + if (CurrentShader != Shader) + { + if(Shader != 0) + CurrentShader = Shader; + glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); + } +} + +void Renderer::SetVSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) +{ + glProgramEnvParameter4fARB(GL_VERTEX_PROGRAM_ARB, const_number, f1, f2, f3, f4); +} + +void Renderer::SetVSConstant4fv(unsigned int const_number, const float *f) +{ + glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number, f); +} + +void Renderer::SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f) +{ + if(GLEW_EXT_gpu_program_parameters) + { + glProgramEnvParameters4fvEXT(GL_VERTEX_PROGRAM_ARB, const_number, count, f); + } + else + { + for (unsigned int i = 0; i < count; i++,f+=4) + glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number + i, f); + } +} + +void Renderer::SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float *f) +{ + if(GLEW_EXT_gpu_program_parameters) + { + float buf[4 * C_VENVCONST_END]; + for (unsigned int i = 0; i < count; i++) + { + buf[4*i ] = *f++; + buf[4*i+1] = *f++; + buf[4*i+2] = *f++; + buf[4*i+3] = 0.f; + } + glProgramEnvParameters4fvEXT(GL_VERTEX_PROGRAM_ARB, const_number, count, buf); + } + else + { + for (unsigned int i = 0; i < count; i++) + { + float buf[4]; + buf[0] = *f++; + buf[1] = *f++; + buf[2] = *f++; + buf[3] = 0.f; + glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number + i, buf); + } + } +} + +} // namespace OGL diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h index f3831b3f7b..019abf6b6f 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h @@ -40,7 +40,7 @@ class VertexShaderCache struct VSCacheEntry { VERTEXSHADER shader; - ShaderUid safe_uid; + VertexShaderUid safe_uid; VSCacheEntry() {} void Destroy() { // printf("Destroying vs %i\n", shader.glprogid); @@ -49,12 +49,12 @@ class VertexShaderCache } }; - typedef std::map VSCache; + typedef std::map VSCache; static VSCache vshaders; static VSCacheEntry* last_entry; - static ShaderUid last_uid; // TODO: Use reference instead.. + static VertexShaderUid last_uid; // TODO: Use reference instead.. static GLuint CurrentShader; static bool ShaderEnabled; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h~ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h~ new file mode 100644 index 0000000000..859645c59a --- /dev/null +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h~ @@ -0,0 +1,76 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _VERTEXSHADERCACHE_H_ +#define _VERTEXSHADERCACHE_H_ + +#include +#include + +#include "BPMemory.h" +#include "VertexShaderGen.h" + +namespace OGL +{ + +struct VERTEXSHADER +{ + VERTEXSHADER() : glprogid(0) {} + GLuint glprogid; // opengl program id + + std::string strprog; +}; + +class VertexShaderCache +{ + struct VSCacheEntry + { + VERTEXSHADER shader; + VertexShaderUid safe_uid; + VSCacheEntry() {} + void Destroy() { + // printf("Destroying vs %i\n", shader.glprogid); + glDeleteProgramsARB(1, &shader.glprogid); + shader.glprogid = 0; + } + }; + + typedef std::map VSCache; + + static VSCache vshaders; + + static VSCacheEntry* last_entry; + static ShaderUid last_uid; // TODO: Use reference instead.. + + static GLuint CurrentShader; + static bool ShaderEnabled; + +public: + static void Init(); + static void Shutdown(); + + static VERTEXSHADER* SetShader(u32 components); + static bool CompileVertexShader(VERTEXSHADER& ps, const char* pstrprogram); + + static void SetCurrentShader(GLuint Shader); + static void DisableShader(); + +}; + +} // namespace OGL + +#endif // _VERTEXSHADERCACHE_H_ From dc0f470215b4b8268c3d60cfdfc1aa07cec99277 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 7 Aug 2012 01:02:04 +0200 Subject: [PATCH 03/54] Added new shader cache uids for pixel shader gen. --- Source/Core/VideoCommon/Src/Debugger.cpp | 8 +- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 670 +++++++----------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 112 ++- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 8 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 5 - .../Core/VideoCommon/Src/VertexShaderGen.cpp~ | 645 ----------------- Source/Core/VideoCommon/Src/VertexShaderGen.h | 6 +- .../Core/VideoCommon/Src/VertexShaderGen.h~ | 2 +- Source/Core/VideoCommon/Src/VideoConfig.h | 2 +- .../Plugin_VideoOGL/Src/PixelShaderCache.cpp | 24 +- .../Plugin_VideoOGL/Src/PixelShaderCache.h | 7 +- 11 files changed, 318 insertions(+), 1171 deletions(-) delete mode 100644 Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ diff --git a/Source/Core/VideoCommon/Src/Debugger.cpp b/Source/Core/VideoCommon/Src/Debugger.cpp index 005f81c8e6..e0024db655 100644 --- a/Source/Core/VideoCommon/Src/Debugger.cpp +++ b/Source/Core/VideoCommon/Src/Debugger.cpp @@ -103,21 +103,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path) if (!useDstAlpha) { output = "Destination alpha disabled:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } else { if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend) { output = "Using dual source blending for destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } else { output = "Using two passes for emulating destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); output += "\n\nDestination alpha pass shader:\n"; - output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); +/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); } } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 448501aad0..5ee95848b5 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -28,241 +28,6 @@ #include "NativeVertexFormat.h" -static void StageHash(u32 stage, u32* out) -{ - out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24 - u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now - out[0] |= (alphaC&0xF0) << 24; // 8 - out[1] |= alphaC >> 8; // 16 - - // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3 - out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1 - // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan - - bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages; - out[2] |= bHasIndStage << 2; // 1 - - bool needstexcoord = false; - - if (bHasIndStage) - { - out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation - needstexcoord = true; - } - - - TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC; - TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC; - - if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC - || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC - || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC - || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC - || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA - || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) - { - out[0] |= bpmem.combiners[stage].alphaC.rswap; - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2 - out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2 - out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23; - out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1; - } - - out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1); - if (bpmem.tevorders[stage/2].getEnable(stage&1)) - { - if (bHasIndStage) needstexcoord = true; - - out[0] |= bpmem.combiners[stage].alphaC.tswap; - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2 - out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2 - out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16; - } - - if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST - || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) - { - out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5 - out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5 - } - - if (needstexcoord) - { - out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16; - } -} - -// Mash together all the inputs that contribute to the code of a generated pixel shader into -// a unique identifier, basically containing all the bits. Yup, it's a lot .... -// It would likely be a lot more efficient to build this incrementally as the attributes -// are set... -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - memset(uid->values, 0, sizeof(uid->values)); - uid->values[0] |= bpmem.genMode.numtevstages; // 4 - uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 - uid->values[0] |= dstAlphaMode << 8; // 2 - - bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - uid->values[0] |= enablePL << 10; // 1 - - if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 11; // 4 - - AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult(); - uid->values[0] |= alphaPreTest << 15; // 2 - - // numtexgens should be <= 8 - for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) - uid->values[0] |= xfregs.texMtxInfo[i].projection << (17+i); // 1 - - uid->values[1] = bpmem.genMode.numindstages; // 3 - u32 indirectStagesUsed = 0; - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - indirectStagesUsed |= (1 << bpmem.tevind[i].bt); - - assert(indirectStagesUsed == (indirectStagesUsed & 0xF)); - - uid->values[1] |= indirectStagesUsed << 3; // 4; - - for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) - { - if (indirectStagesUsed & (1 << i)) - { - uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (7 + 3*i); // 1 - if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) - uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (8 + 3*i); // 2 - } - } - - u32* ptr = &uid->values[2]; - for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i) - { - StageHash(i, ptr); - ptr += 4; // max: ptr = &uid->values[66] - } - - ptr[0] |= bpmem.alpha_test.comp0; // 3 - ptr[0] |= bpmem.alpha_test.comp1 << 3; // 3 - ptr[0] |= bpmem.alpha_test.logic << 6; // 2 - - ptr[0] |= bpmem.ztex2.op << 8; // 2 - ptr[0] |= bpmem.zcontrol.early_ztest << 10; // 1 - ptr[0] |= bpmem.zmode.testenable << 11; // 1 - ptr[0] |= bpmem.zmode.updateenable << 12; // 1 - - if (dstAlphaMode != DSTALPHA_ALPHA_PASS) - { - ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 13; // 3 - if (bpmem.fog.c_proj_fsel.fsel != 0) - { - ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1 - ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1 - } - } - - ++ptr; - if (enablePL) - { - ptr += GetLightingShaderId(ptr); - *ptr++ = components; - } - - uid->num_values = ptr - uid->values; -} - -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - memset(uid->values, 0, sizeof(uid->values)); - u32* ptr = uid->values; - *ptr++ = dstAlphaMode; // 0 - *ptr++ = bpmem.genMode.hex; // 1 - *ptr++ = bpmem.ztex2.hex; // 2 - *ptr++ = bpmem.zcontrol.hex; // 3 - *ptr++ = bpmem.zmode.hex; // 4 - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 5 - *ptr++ = xfregs.numTexGen.hex; // 6 - - if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - *ptr++ = xfregs.color[0].hex; - *ptr++ = xfregs.alpha[0].hex; - *ptr++ = xfregs.color[1].hex; - *ptr++ = xfregs.alpha[1].hex; - *ptr++ = components; - } - - for (unsigned int i = 0; i < 8; ++i) - *ptr++ = xfregs.texMtxInfo[i].hex; // 7-14 - - for (unsigned int i = 0; i < 16; ++i) - *ptr++ = bpmem.tevind[i].hex; // 15-30 - - *ptr++ = bpmem.tevindref.hex; // 31 - - for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times - { - *ptr++ = bpmem.combiners[i].colorC.hex; // 32+5*i - *ptr++ = bpmem.combiners[i].alphaC.hex; // 33+5*i - *ptr++ = bpmem.tevind[i].hex; // 34+5*i - *ptr++ = bpmem.tevksel[i/2].hex; // 35+5*i - *ptr++ = bpmem.tevorders[i/2].hex; // 36+5*i - } - - ptr = &uid->values[112]; - - *ptr++ = bpmem.alpha_test.hex; // 112 - - *ptr++ = bpmem.fog.c_proj_fsel.hex; // 113 - *ptr++ = bpmem.fogRange.Base.hex; // 114 - - _assert_((ptr - uid->values) == uid->GetNumValues()); -} - -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components) -{ - if (!g_ActiveConfig.bEnableShaderDebugging) - return; - - PIXELSHADERUIDSAFE new_id; - GetSafePixelShaderId(&new_id, dstAlphaMode, components); - - if (!(old_id == new_id)) - { - std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components)); - if (old_code != new_code) - { - _assert_(old_id.GetNumValues() == new_id.GetNumValues()); - - char msg[8192]; - char* ptr = msg; - ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n"); - const int N = new_id.GetNumValues(); - for (int i = 0; i < N/2; ++i) - ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1], - new_id.values[2*i], new_id.values[2*i+1]); - if (N % 2) - ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]); - - static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file(szTemp); - file << msg; - file << "\n\nOld shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code; - file.close(); - - PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp); - } - } -} - // old tev->pixelshader notes // // color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 @@ -272,11 +37,11 @@ void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std:: // output is given by .outreg // tevtemp is set according to swapmodetables and -static void WriteStage(char *&p, int n, API_TYPE ApiType); -static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +template static void WriteStage(char *&p, int n, API_TYPE ApiType); +template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); // static void WriteAlphaCompare(char *&p, int num, int comp); -static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); -static void WriteFog(char *&p); +template static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); +template static void WriteFog(char *&p); static const char *tevKSelTableC[] = // KCSEL { @@ -456,8 +221,6 @@ static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" }; static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" }; -#define WRITE p+=sprintf - static char swapModeTable[4][5]; static char text[16384]; @@ -484,105 +247,109 @@ static void BuildSwapModeTable() } } -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +template +void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - text[sizeof(text) - 1] = 0x7C; // canary +#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; + if (type == GO_ShaderCode) + { + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + out.SetBuffer(text); + } +/// text[sizeof(text) - 1] = 0x7C; // canary + /// TODO: Uids! BuildSwapModeTable(); // Needed for WriteStage - int numStages = bpmem.genMode.numtevstages + 1; - int numTexgen = bpmem.genMode.numtexgens; + unsigned int numStages = bpmem.genMode.numtevstages + 1; + unsigned int numTexgen = bpmem.genMode.numtexgens; char *p = text; - WRITE(p, "//Pixel Shader for TEV stages\n"); - WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n", + out.Write("//Pixel Shader for TEV stages\n"); + out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", numStages, numTexgen/*, bpmem.genMode.numindstages*/); - int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) - { - for (int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - // Declare samplers + SetUidField(components, components); + SetUidField(dstAlphaMode, dstAlphaMode); + SetUidField(genMode.numindstages, bpmem.genMode.numindstages); + SetUidField(genMode.numtevstages, bpmem.genMode.numtevstages); + SetUidField(genMode.numtexgens, bpmem.genMode.numtexgens); + + // Declare samplers if(ApiType != API_D3D11) { - WRITE(p, "uniform sampler2D "); + out.Write("uniform sampler2D "); } else { - WRITE(p, "sampler "); + out.Write("sampler "); } bool bfirst = true; for (int i = 0; i < 8; ++i) { - WRITE(p, "%s samp%d : register(s%d)", bfirst?"":",", i, i); + out.Write("%s samp%d : register(s%d)", bfirst?"":",", i, i); bfirst = false; } - WRITE(p, ";\n"); + out.Write(";\n"); if(ApiType == API_D3D11) { - WRITE(p, "Texture2D "); + out.Write("Texture2D "); bfirst = true; for (int i = 0; i < 8; ++i) { - WRITE(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i); + out.Write("%s Tex%d : register(t%d)", bfirst?"":",", i, i); bfirst = false; } - WRITE(p, ";\n"); + out.Write(";\n"); } - WRITE(p, "\n"); + out.Write("\n"); - WRITE(p, "uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); - WRITE(p, "uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); - WRITE(p, "uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); - WRITE(p, "uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); - WRITE(p, "uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); - WRITE(p, "uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); - WRITE(p, "uniform float4 " I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); - WRITE(p, "uniform float4 " I_FOG"[3] : register(c%d);\n", C_FOG); + out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); + out.Write("uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); + out.Write("uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); + out.Write("uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); + out.Write("uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); + out.Write("uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); + out.Write("uniform float4 " I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); + out.Write("uniform float4 " I_FOG"[3] : register(c%d);\n", C_FOG); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); - WRITE(p,"typedef struct { Light lights[8]; } s_" I_PLIGHTS";\n"); - WRITE(p, "uniform s_" I_PLIGHTS" " I_PLIGHTS" : register(c%d);\n", C_PLIGHTS); - WRITE(p, "typedef struct { float4 C0, C1, C2, C3; } s_" I_PMATERIALS";\n"); - WRITE(p, "uniform s_" I_PMATERIALS" " I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); + out.Write("typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); + out.Write("typedef struct { Light lights[8]; } s_" I_PLIGHTS";\n"); + out.Write("uniform s_" I_PLIGHTS" " I_PLIGHTS" : register(c%d);\n", C_PLIGHTS); + out.Write("typedef struct { float4 C0, C1, C2, C3; } s_" I_PMATERIALS";\n"); + out.Write("uniform s_" I_PMATERIALS" " I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); } - WRITE(p, "void main(\n"); + out.Write("void main(\n"); if(ApiType != API_D3D11) { - WRITE(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", + out.Write(" out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "", "\n out float depth : DEPTH,", ApiType & API_OPENGL ? "WPOS" : ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS"); } else { - WRITE(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", + out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", "\n out float depth : SV_Depth,"); } - WRITE(p, " in float4 colors_0 : COLOR0,\n"); - WRITE(p, " in float4 colors_1 : COLOR1"); + out.Write(" in float4 colors_0 : COLOR0,\n"); + out.Write(" in float4 colors_1 : COLOR1"); // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { - for (int i = 0; i < numTexgen; ++i) - WRITE(p, ",\n in float3 uv%d : TEXCOORD%d", i, i); - WRITE(p, ",\n in float4 clipPos : TEXCOORD%d", numTexgen); + for (unsigned int i = 0; i < numTexgen; ++i) + out.Write(",\n in float3 uv%d : TEXCOORD%d", i, i); + out.Write(",\n in float4 clipPos : TEXCOORD%d", numTexgen); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, ",\n in float4 Normal : TEXCOORD%d", numTexgen + 1); + out.Write(",\n in float4 Normal : TEXCOORD%d", numTexgen + 1); } else { @@ -590,17 +357,18 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { for (int i = 0; i < 8; ++i) - WRITE(p, ",\n in float4 uv%d : TEXCOORD%d", i, i); + out.Write(",\n in float4 uv%d : TEXCOORD%d", i, i); } else { + /// TODO: Set numTexGen used for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); + out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); } } - WRITE(p, " ) {\n"); + out.Write(" ) {\n"); - WRITE(p, " float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" + out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n" " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n" " float4 alphabump=float4(0.0f,0.0f,0.0f,0.0f);\n" " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n" @@ -613,63 +381,96 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType { if (xfregs.numTexGen.numTexGens < 7) { - WRITE(p,"float3 _norm0 = normalize(Normal.xyz);\n\n"); - WRITE(p,"float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); + out.Write("float3 _norm0 = normalize(Normal.xyz);\n\n"); + out.Write("float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); } else { - WRITE(p," float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); - WRITE(p,"float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); + out.Write(" float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); + out.Write("float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); } - WRITE(p, "float4 mat, lacc;\n" + out.Write("float4 mat, lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); - p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); +/// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } if (numTexgen < 7) - WRITE(p, "clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); + out.Write("clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); else - WRITE(p, "float4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); + out.Write("float4 clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { - WRITE(p, "float3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); + out.Write("float3 uv0 = float3(0.0f, 0.0f, 0.0f);\n"); } else { - for (int i = 0; i < numTexgen; ++i) + for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides + SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { - WRITE(p, "if (uv%d.z)", i); - WRITE(p, " uv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); + out.Write("if (uv%d.z)", i); + out.Write(" uv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } - WRITE(p, "uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); + out.Write("uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); } } // indirect texture map lookup + int nIndirectStagesUsed = 0; + if (bpmem.genMode.numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + /// Ignoring this for now, handled in WriteStage. + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + SetUidField(nIndirectStagesUsed, nIndirectStagesUsed); for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { if (nIndirectStagesUsed & (1<(out, buffer, "tempcoord", "abg", texmap, ApiType); } } @@ -683,8 +484,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType RegisterStates[i].AuxStored = false; } - for (int i = 0; i < numStages; i++) - WriteStage(p, i, ApiType); //build the equation for this stage + for (unsigned int i = 0; i < numStages; i++) + WriteStage(out, i, ApiType); //build the equation for this stage if(numStages) { @@ -692,48 +493,49 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType // regardless of the used destination register if(bpmem.combiners[numStages - 1].colorC.dest != 0) { +/// SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; - WRITE(p, "prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); + out.Write("prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; } if(bpmem.combiners[numStages - 1].alphaC.dest != 0) { bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored; - WRITE(p, "prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); + out.Write("prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl; } } // emulation of unsigned 8 overflow when casting if needed if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) - WRITE(p, "prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); if (Pretest == AlphaTest::UNDETERMINED) - WriteAlphaTest(p, ApiType, dstAlphaMode); + WriteAlphaTest(out, ApiType, dstAlphaMode); // the screen space depth value = far z + (clip z / clip w) * z range - WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); + out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); // Note: depth textures are disabled if early depth test is enabled if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... - WRITE(p, "zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", + out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); // scale to make result from frac correct - WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n"); - WRITE(p, "zCoord = frac(zCoord);\n"); - WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n"); + out.Write("zCoord = zCoord * (16777215.0f/16777216.0f);\n"); + out.Write("zCoord = frac(zCoord);\n"); + out.Write("zCoord = zCoord * (16777216.0f/16777215.0f);\n"); } - WRITE(p, "depth = zCoord;\n"); + out.Write("depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) - WRITE(p, " ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); + out.Write(" ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); else { - WriteFog(p); - WRITE(p, " ocol0 = prev;\n"); + WriteFog(out); + out.Write(" ocol0 = prev;\n"); } // On D3D11, use dual-source color blending to perform dst alpha in a @@ -741,17 +543,16 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) { // Colors will be blended against the alpha from ocol1... - WRITE(p, " ocol1 = ocol0;\n"); + out.Write(" ocol1 = ocol0;\n"); // ...and the alpha from ocol0 will be written to the framebuffer. - WRITE(p, " ocol0.a = " I_ALPHA"[0].a;\n"); + out.Write(" ocol0.a = " I_ALPHA"[0].a;\n"); } - WRITE(p, "}\n"); - if (text[sizeof(text) - 1] != 0x7C) - PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); + out.Write("}\n"); +/// if (text[sizeof(text) - 1] != 0x7C) +/// PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); setlocale(LC_NUMERIC, ""); // restore locale - return text; } @@ -800,7 +601,8 @@ static const char *TEVCMPAlphaOPTable[16] = }; -static void WriteStage(char *&p, int n, API_TYPE ApiType) +template +static void WriteStage(T& out, int n, API_TYPE ApiType) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -810,25 +612,25 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) if (!bHasTexCoord) texcoord = 0; - WRITE(p, "// TEV stage %d\n", n); + out.Write("// TEV stage %d\n", n); if (bHasIndStage) { - WRITE(p, "// indirect op\n"); + out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { - WRITE(p, "alphabump = indtex%d.%s %s;\n", + out.Write("alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); } // format - WRITE(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); + out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias if (bpmem.tevind[n].bias != ITB_NONE ) - WRITE(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale if (bpmem.tevind[n].mid != 0) @@ -836,26 +638,26 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1); - WRITE(p, "float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", + out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", n, mtxidx, n, mtxidx+1, n); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); - WRITE(p, "float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); - WRITE(p, "float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); } else - WRITE(p, "float2 indtevtrans%d = 0;\n", n); + out.Write("float2 indtevtrans%d = 0;\n", n); } else - WRITE(p, "float2 indtevtrans%d = 0;\n", n); + out.Write("float2 indtevtrans%d = 0;\n", n); // --------- // Wrapping @@ -863,24 +665,24 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - WRITE(p, "wrappedcoord.x = uv%d.x;\n", texcoord); + out.Write("wrappedcoord.x = uv%d.x;\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) - WRITE(p, "wrappedcoord.x = 0.0f;\n"); + out.Write("wrappedcoord.x = 0.0f;\n"); else - WRITE(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - WRITE(p, "wrappedcoord.y = uv%d.y;\n", texcoord); + out.Write("wrappedcoord.y = uv%d.y;\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) - WRITE(p, "wrappedcoord.y = 0.0f;\n"); + out.Write("wrappedcoord.y = 0.0f;\n"); else - WRITE(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord - WRITE(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); + out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); else - WRITE(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); + out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); } TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; @@ -894,8 +696,8 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); - WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + out.Write("crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); } @@ -905,17 +707,17 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { // calc tevcord if(bHasTexCoord) - WRITE(p, "tevcoord.xy = uv%d.xy;\n", texcoord); + out.Write("tevcoord.xy = uv%d.xy;\n", texcoord); else - WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n"); + out.Write("tevcoord.xy = float2(0.0f, 0.0f);\n"); } char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); - SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType); + SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); } else - WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + out.Write("textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST @@ -923,14 +725,14 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); - WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if(kc > 7 || ka > 7) { - WRITE(p, "ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); } else { - WRITE(p, "ckonsttemp = konsttemp;\n"); + out.Write("ckonsttemp = konsttemp;\n"); } } @@ -941,13 +743,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) { - WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[0].AlphaNeedOverflowControl = false; RegisterStates[0].ColorNeedOverflowControl = false; } else { - WRITE(p, "cprev = prev;\n"); + out.Write("cprev = prev;\n"); } RegisterStates[0].AuxStored = true; } @@ -959,13 +761,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) { - WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[1].AlphaNeedOverflowControl = false; RegisterStates[1].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc0 = c0;\n"); + out.Write("cc0 = c0;\n"); } RegisterStates[1].AuxStored = true; } @@ -977,13 +779,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) { - WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[2].AlphaNeedOverflowControl = false; RegisterStates[2].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc1 = c1;\n"); + out.Write("cc1 = c1;\n"); } RegisterStates[2].AuxStored = true; } @@ -995,13 +797,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) { if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) { - WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); + out.Write("cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); RegisterStates[3].AlphaNeedOverflowControl = false; RegisterStates[3].ColorNeedOverflowControl = false; } else { - WRITE(p, "cc2 = c2;\n"); + out.Write("cc2 = c2;\n"); } RegisterStates[3].AuxStored = true; } @@ -1009,117 +811,116 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); RegisterStates[cc.dest].AuxStored = false; - // combine the color channel - WRITE(p, "// color combine\n"); + out.Write("// color combine\n"); if (cc.clamp) - WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]); + out.Write("%s = saturate(", tevCOutputTable[cc.dest]); else - WRITE(p, "%s = ", tevCOutputTable[cc.dest]); + out.Write("%s = ", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare { //normal color combiner goes here if (cc.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[cc.shift]); + out.Write("%s*(", tevScaleTable[cc.shift]); if(!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - WRITE(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); + out.Write("%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); if (cc.a == cc.b) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); + out.Write("%s", tevCInputTable[cc.a + 16]); else if (cc.c == TEVCOLORARG_ZERO) - WRITE(p, "%s", tevCInputTable[cc.a + 16]); + out.Write("%s", tevCInputTable[cc.a + 16]); else if (cc.c == TEVCOLORARG_ONE) - WRITE(p, "%s", tevCInputTable[cc.b + 16]); + out.Write("%s", tevCInputTable[cc.b + 16]); else if (cc.a == TEVCOLORARG_ZERO) - WRITE(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); else if (cc.b == TEVCOLORARG_ZERO) - WRITE(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); + out.Write("%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); else - WRITE(p, "lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); - WRITE(p, "%s", tevBiasTable[cc.bias]); + out.Write("%s", tevBiasTable[cc.bias]); if (cc.shift > TEVSCALE_1) - WRITE(p, ")"); + out.Write(")"); } else { int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here - WRITE(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table + out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table tevCInputTable[cc.d], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); } if (cc.clamp) - WRITE(p, ")"); - WRITE(p,";\n"); + out.Write(")"); + out.Write(";\n"); RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); RegisterStates[ac.dest].AuxStored = false; - // combine the alpha channel - WRITE(p, "// alpha combine\n"); + out.Write("// alpha combine\n"); if (ac.clamp) - WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]); + out.Write("%s = saturate(", tevAOutputTable[ac.dest]); else - WRITE(p, "%s = ", tevAOutputTable[ac.dest]); + out.Write("%s = ", tevAOutputTable[ac.dest]); if (ac.bias != TevBias_COMPARE) // if not compare { //normal alpha combiner goes here if (ac.shift > TEVSCALE_1) - WRITE(p, "%s*(", tevScaleTable[ac.shift]); + out.Write("%s*(", tevScaleTable[ac.shift]); if(!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - WRITE(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); + out.Write("%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); if (ac.a == ac.b) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + out.Write("%s.a", tevAInputTable[ac.a + 8]); else if (ac.c == TEVALPHAARG_ZERO) - WRITE(p, "%s.a", tevAInputTable[ac.a + 8]); + out.Write("%s.a", tevAInputTable[ac.a + 8]); else if (ac.a == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); else if (ac.b == TEVALPHAARG_ZERO) - WRITE(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); + out.Write("%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); else - WRITE(p, "lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); - WRITE(p, "%s",tevBiasTable[ac.bias]); + out.Write("%s",tevBiasTable[ac.bias]); if (ac.shift>0) - WRITE(p, ")"); + out.Write(")"); } else { //compare alpha combiner goes here int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - WRITE(p, TEVCMPAlphaOPTable[cmp], + out.Write(TEVCMPAlphaOPTable[cmp], tevAInputTable[ac.d], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); } if (ac.clamp) - WRITE(p, ")"); - WRITE(p, ";\n\n"); - WRITE(p, "// TEV done\n"); + out.Write(")"); + out.Write(";\n\n"); + out.Write("// TEV done\n"); } -void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) +template +void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { if (ApiType == API_D3D11) - WRITE(p, "%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); + out.Write("%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); else - WRITE(p, "%s=tex2D(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); + out.Write("%s=tex2D(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap, texcoords, texmap, texswap); } static const char *tevAlphaFuncsTable[] = { - "(false)", //ALPHACMP_NEVER 0 + "(false)", //ALPHACMP_NEVER 0, TODO: Not safe? "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 @@ -1137,7 +938,8 @@ static const char *tevAlphaFunclogicTable[] = " == " // xnor }; -static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode) +template +static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode) { static const char *alphaRef[2] = { @@ -1146,21 +948,21 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode }; // using discard then return works the same in cg and dx9 but not in dx11 - WRITE(p, "if(!( "); + out.Write("if(!( "); int compindex = bpmem.alpha_test.comp0; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table + out.Write(tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table - WRITE(p, "%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op + out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op compindex = bpmem.alpha_test.comp1; - WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table - WRITE(p, ")) {\n"); + out.Write(tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table + out.Write(")) {\n"); - WRITE(p, "ocol0 = 0;\n"); + out.Write("ocol0 = 0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) - WRITE(p, "ocol1 = 0;\n"); - WRITE(p, "depth = 1.f;\n"); + out.Write("ocol1 = 0;\n"); + out.Write("depth = 1.f;\n"); // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before // or after texturing and alpha test. PC GPUs have no way to support this @@ -1174,12 +976,12 @@ static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode // we don't have a choice. if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) { - WRITE(p, "discard;\n"); + out.Write("discard;\n"); if (ApiType != API_D3D11) - WRITE(p, "return;\n"); + out.Write("return;\n"); } - WRITE(p, "}\n"); + out.Write("}\n"); } static const char *tevFogFuncsTable[] = @@ -1194,38 +996,40 @@ static const char *tevFogFuncsTable[] = " fog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" //backward exp2 }; -static void WriteFog(char *&p) +template +static void WriteFog(T& out) { - if(bpmem.fog.c_proj_fsel.fsel == 0)return;//no Fog + if(bpmem.fog.c_proj_fsel.fsel == 0) + return; //no Fog if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective // ze = A/(B - (Zs >> B_SHF) - WRITE (p, " float ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); + out.Write(" float ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - WRITE (p, " float ze = " I_FOG"[1].x * zCoord;\n"); + out.Write(" float ze = " I_FOG"[1].x * zCoord;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust - //this is complitly teorical as the real hard seems to use a table intead of calculate the values. + // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. if(bpmem.fogRange.Base.Enabled) { - WRITE (p, " float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); - WRITE (p, " x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); - WRITE (p, " ze *= x_adjust;\n"); + out.Write(" float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); + out.Write(" x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); + out.Write(" ze *= x_adjust;\n"); } - WRITE (p, " float fog = saturate(ze - " I_FOG"[1].z);\n"); + out.Write("float fog = saturate(ze - " I_FOG"[1].z);\n"); if(bpmem.fog.c_proj_fsel.fsel > 3) { - WRITE(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); + out.Write("%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]); } else { @@ -1233,7 +1037,15 @@ static void WriteFog(char *&p) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - WRITE(p, " prev.rgb = lerp(prev.rgb," I_FOG"[0].rgb,fog);\n"); - - + out.Write(" prev.rgb = lerp(prev.rgb," I_FOG"[0].rgb,fog);\n"); +} + +void GetPixelShaderId(PixelShaderUid& object, DSTALPHA_MODE dst_alpha_mode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dst_alpha_mode, ApiType, components); +} + +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dst_alpha_mode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dst_alpha_mode, ApiType, components); } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 9c8bfce256..61be98aa6e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -19,6 +19,7 @@ #define GCOGL_PIXELSHADER_H #include "VideoCommon.h" +#include "ShaderGenCommon.h" #define I_COLORS "color" #define I_KCOLORS "k" @@ -44,66 +45,6 @@ #define C_PLIGHTS (C_FOG + 3) #define C_PMATERIALS (C_PLIGHTS + 40) #define C_PENVCONST_END (C_PMATERIALS + 4) -#define PIXELSHADERUID_MAX_VALUES 70 -#define PIXELSHADERUID_MAX_VALUES_SAFE 115 - -// DO NOT make anything in this class virtual. -template -class _PIXELSHADERUID -{ -public: - u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES]; - int num_values; - - _PIXELSHADERUID() - { - } - - _PIXELSHADERUID(const _PIXELSHADERUID& r) - { - num_values = r.num_values; - if (safe) memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE); - else memcpy(values, r.values, r.GetNumValues() * sizeof(values[0])); - } - - int GetNumValues() const - { - if (safe) return (sizeof(values) / sizeof(u32)); - else return num_values; - } - - bool operator <(const _PIXELSHADERUID& _Right) const - { - int N = GetNumValues(); - if (N < _Right.GetNumValues()) - return true; - else if (N > _Right.GetNumValues()) - return false; - for (int i = 0; i < N; ++i) - { - if (values[i] < _Right.values[i]) - return true; - else if (values[i] > _Right.values[i]) - return false; - } - return false; - } - - bool operator ==(const _PIXELSHADERUID& _Right) const - { - int N = GetNumValues(); - if (N != _Right.GetNumValues()) - return false; - for (int i = 0; i < N; ++i) - { - if (values[i] != _Right.values[i]) - return false; - } - return true; - } -}; -typedef _PIXELSHADERUID PIXELSHADERUID; -typedef _PIXELSHADERUID PIXELSHADERUIDSAFE; // Different ways to achieve rendering with destination alpha enum DSTALPHA_MODE @@ -113,12 +54,53 @@ enum DSTALPHA_MODE DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending }; -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +enum ALPHA_PRETEST_RESULT +{ + ALPHAPT_UNDEFINED, // AlphaTest Result is not defined + ALPHAPT_ALWAYSFAIL, // Alpha test alway Fail + ALPHAPT_ALWAYSPASS // Alpha test alway Pass +}; -void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components); -void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components); +struct pixel_shader_uid_data +{ + u32 components; + DSTALPHA_MODE dstAlphaMode; // TODO: as u32 :2 + ALPHA_PRETEST_RESULT Pretest; // TODO: As :2 + u32 nIndirectStagesUsed : 8; + struct { + u32 numtexgens : 4; + u32 numtevstages : 4; + u32 numindstages : 3; + } genMode; + u32 fogc_proj_fselfsel : 3; + struct + { + u32 unknown : 1; + u32 projection : 1; // XF_TEXPROJ_X + u32 inputform : 2; // XF_TEXINPUT_X + u32 texgentype : 3; // XF_TEXGEN_X + u32 sourcerow : 5; // XF_SRCGEOM_X + u32 embosssourceshift : 3; // what generated texcoord to use + u32 embosslightshift : 3; // light index that is used + } texMtxInfo[8]; + struct + { + u32 bi0 : 3; // indirect tex stage 0 ntexmap + u32 bc0 : 3; // indirect tex stage 0 ntexcoord + u32 bi1 : 3; + u32 bc1 : 3; + u32 bi2 : 3; + u32 bc3 : 3; + u32 bi4 : 3; + u32 bc4 : 3; + } tevindref; +}; -// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes -void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components); +typedef ShaderUid PixelShaderUid; +typedef ShaderCode PixelShaderCode; + + +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +void GetPixelShaderId(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); #endif // GCOGL_PIXELSHADER_H diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 28fb8e8956..090fb24b57 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -44,7 +44,7 @@ public: // TODO: Store last frame used and order by that? makes much more sense anyway... bool operator < (const ShaderUid& obj) const { - for (int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) + for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) { if (this->values[i] < obj.values[i]) return true; @@ -91,4 +91,10 @@ private: char* write_ptr; }; +enum GenOutput +{ + GO_ShaderCode, + GO_ShaderUid, +}; + #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index acc89c6a54..db4946a49f 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -28,11 +28,6 @@ static char text[16768]; -enum GenOutput -{ - GO_ShaderCode, - GO_ShaderUid, -}; // TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? template void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ deleted file mode 100644 index be57103918..0000000000 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp~ +++ /dev/null @@ -1,645 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include -#include - -#include "NativeVertexFormat.h" - -#include "BPMemory.h" -#include "CPMemory.h" -#include "LightingShaderGen.h" -#include "VertexShaderGen.h" -#include "VideoConfig.h" - -static char text[16768]; - -enum GenOutput -{ - GO_ShaderCode, - GO_ShaderUid, -}; -// TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? -template -void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) -{ - object.Write("struct VS_OUTPUT {\n"); - object.Write(" float4 pos : POSITION;\n"); - object.Write(" float4 colors_0 : COLOR0;\n"); - object.Write(" float4 colors_1 : COLOR1;\n"); - - if (xfregs.numTexGen.numTexGens < 7) - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - object.Write(" float3 tex%d : TEXCOORD%d;\n", i, i); - - object.Write(" float4 clipPos : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens); -/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) -/// object.Write(" float4 Normal : TEXCOORD%d;\n", xfregs.numTexGen.numTexGens + 1); - } - else - { - // clip position is in w of first 4 texcoords -/// if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) -/// { -/// for (int i = 0; i < 8; ++i) -/// object.Write(" float4 tex%d : TEXCOORD%d;\n", i, i); -/// } -/// else - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - object.Write(" float%d tex%d : TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, i); - } - } - object.Write("};\n"); -} - -template -void _GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) -{ -#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; - const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; - const char* swizzle = "xyzw"; - if (coloralpha == 1 ) swizzle = "xyz"; - else if (coloralpha == 2 ) swizzle = "w"; - - SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); - SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); - if (!(chan.attnfunc & 1)) { - // atten disabled - switch (chan.diffusefunc) { - case LIGHTDIF_NONE: - object.Write("lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - object.Write("ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); - object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); - break; - default: _assert_(0); - } - } - else { // spec and spot - - if (chan.attnfunc == 3) - { // spot - object.Write("ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); - object.Write("dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); - } - else if (chan.attnfunc == 1) - { // specular - object.Write("ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); - object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); - } - - switch (chan.diffusefunc) - { - case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, - chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - lightsName, - index, - swizzle); - break; - default: _assert_(0); - } - } - object.Write("\n"); -} - -// vertex shader -// lights/colors -// materials name is I_MATERIALS in vs and I_PMATERIALS in ps -// inColorName is color in vs and colors_ in ps -// dest is o.colors_ in vs and colors_ in ps -template -void _GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) -{ - for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) - { - const LitChannel& color = xfregs.color[j]; - const LitChannel& alpha = xfregs.alpha[j]; - - object.Write("{\n"); - - SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); - if (color.matsource) {// from vertex - if (components & (VB_HAS_COL0 << j)) - object.Write("mat = %s%d;\n", inColorName, j); - else if (components & VB_HAS_COL0) - object.Write("mat = %s0;\n", inColorName); - else - object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - else // from color - object.Write("mat = %s.C%d;\n", materialsName, j+2); - - SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); - if (color.enablelighting) { - SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); - if (color.ambsource) { // from vertex - if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); - } - } - } - } - - // no shared lights - for (int i = 0; i < 8; ++i) - { - if (!(mask&(1<(object, i, j, lightsName, 1); - if (!(mask&(1<(object, i, j+2, lightsName, 2); - } - } - else if (color.enablelighting || alpha.enablelighting) - { - // lights are disabled on one channel so process only the active ones - const LitChannel& workingchannel = color.enablelighting ? color : alpha; - const int lit_index = color.enablelighting ? j : (j+2); - int coloralpha = color.enablelighting ? 1 : 2; - - SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); - for (int i = 0; i < 8; ++i) - { - if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); - } - } - object.Write("%s%d = mat * saturate(lacc);\n", dest, j); - object.Write("}\n"); - } -} - -// TODO: Problem: this one uses copy constructors or sth for uids when returning... -template -void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) -{ -#undef SetUidField -#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; - - if (type == GO_ShaderCode) - { - out.SetBuffer(text); - setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - } - - /// text[sizeof(text) - 1] = 0x7C; // canary - - bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); - u32 lightMask = 0; - if (xfregs.numChan.numColorChans > 0) - lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); - if (xfregs.numChan.numColorChans > 1) - lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); - - out.Write("//Vertex Shader: comp:%x, \n", components); - out.Write("typedef struct { float4 T0, T1, T2; float4 N0, N1, N2; } s_" I_POSNORMALMATRIX";\n" - "typedef struct { float4 t; } FLT4;\n" - "typedef struct { FLT4 T[24]; } s_" I_TEXMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_" I_TRANSFORMMATRICES";\n" - "typedef struct { FLT4 T[32]; } s_" I_NORMALMATRICES";\n" - "typedef struct { FLT4 T[64]; } s_" I_POSTTRANSFORMMATRICES";\n" - "typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n" - "typedef struct { Light lights[8]; } s_" I_LIGHTS";\n" - "typedef struct { float4 C0, C1, C2, C3; } s_" I_MATERIALS";\n" - "typedef struct { float4 T0, T1, T2, T3; } s_" I_PROJECTION";\n" - ); - -/// p = GenerateVSOutputStruct(p, components, api_type); - GenerateVSOutputStruct(out, components, api_type); - - // uniforms - - out.Write("uniform s_" I_TRANSFORMMATRICES" " I_TRANSFORMMATRICES" : register(c%d);\n", C_TRANSFORMMATRICES); - out.Write("uniform s_" I_TEXMATRICES" " I_TEXMATRICES" : register(c%d);\n", C_TEXMATRICES); - out.Write("uniform s_" I_NORMALMATRICES" " I_NORMALMATRICES" : register(c%d);\n", C_NORMALMATRICES); - out.Write("uniform s_" I_POSNORMALMATRIX" " I_POSNORMALMATRIX" : register(c%d);\n", C_POSNORMALMATRIX); - out.Write("uniform s_" I_POSTTRANSFORMMATRICES" " I_POSTTRANSFORMMATRICES" : register(c%d);\n", C_POSTTRANSFORMMATRICES); - out.Write("uniform s_" I_LIGHTS" " I_LIGHTS" : register(c%d);\n", C_LIGHTS); - out.Write("uniform s_" I_MATERIALS" " I_MATERIALS" : register(c%d);\n", C_MATERIALS); - out.Write("uniform s_" I_PROJECTION" " I_PROJECTION" : register(c%d);\n", C_PROJECTION); - out.Write("uniform float4 " I_DEPTHPARAMS" : register(c%d);\n", C_DEPTHPARAMS); - - out.Write("VS_OUTPUT main(\n"); - - SetUidField(numTexGens, xfregs.numTexGen.numTexGens); - SetUidField(components, components); - // inputs - if (components & VB_HAS_NRM0) - out.Write(" float3 rawnorm0 : NORMAL0,\n"); - if (components & VB_HAS_NRM1) - { - if (is_d3d) - out.Write(" float3 rawnorm1 : NORMAL1,\n"); - else - out.Write(" float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); - } - if (components & VB_HAS_NRM2) - { - if (is_d3d) - out.Write(" float3 rawnorm2 : NORMAL2,\n"); - else - out.Write(" float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); - } - if (components & VB_HAS_COL0) - out.Write(" float4 color0 : COLOR0,\n"); - if (components & VB_HAS_COL1) - out.Write(" float4 color1 : COLOR1,\n"); - for (int i = 0; i < 8; ++i) { - u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<= 32 ? (posmtx-32) : posmtx;\n"); - out.Write("float3 N0 = " I_NORMALMATRICES".T[normidx].t.xyz, N1 = " I_NORMALMATRICES".T[normidx+1].t.xyz, N2 = " I_NORMALMATRICES".T[normidx+2].t.xyz;\n"); - } - - if (components & VB_HAS_NRM0) - out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) - out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); - if (components & VB_HAS_NRM2) - out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); - } - else - { - out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX".T0, rawpos), dot(" I_POSNORMALMATRIX".T1, rawpos), dot(" I_POSNORMALMATRIX".T2, rawpos), 1.0f);\n"); - if (components & VB_HAS_NRM0) - out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm0), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm0)));\n"); - if (components & VB_HAS_NRM1) - out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm1), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm1));\n"); - if (components & VB_HAS_NRM2) - out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX".N0.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N1.xyz, rawnorm2), dot(" I_POSNORMALMATRIX".N2.xyz, rawnorm2));\n"); - } - - if (!(components & VB_HAS_NRM0)) - out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); - - - - out.Write("o.pos = float4(dot(" I_PROJECTION".T0, pos), dot(" I_PROJECTION".T1, pos), dot(" I_PROJECTION".T2, pos), dot(" I_PROJECTION".T3, pos));\n"); - - out.Write("float4 mat, lacc;\n" - "float3 ldir, h;\n" - "float dist, dist2, attn;\n"); - - SetUidField(numColorChans, xfregs.numChan.numColorChans); - if(xfregs.numChan.numColorChans == 0) - { - if (components & VB_HAS_COL0) - out.Write("o.colors_0 = color0;\n"); - else - out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - - // TODO: This probably isn't necessary if pixel lighting is enabled. - _GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); - - if(xfregs.numChan.numColorChans < 2) - { - if (components & VB_HAS_COL1) - out.Write("o.colors_1 = color1;\n"); - else - out.Write("o.colors_1 = o.colors_0;\n"); - } - // special case if only pos and tex coord 0 and tex coord input is AB11 - // donko - this has caused problems in some games. removed for now. - bool texGenSpecialCase = false; - /*bool texGenSpecialCase = - ((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 - (g_VtxDesc.Tex0Coord != NOT_PRESENT) && - (xfregs.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); - */ - - // transform texcoords - out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; - - out.Write("{\n"); - out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - SetUidField(texMtxInfo[i].sourcerow, xfregs.texMtxInfo[i].sourcerow); - switch (texinfo.sourcerow) { - case XF_SRCGEOM_INROW: - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = rawpos;\n"); // pos.w is 1 - break; - case XF_SRCNORMAL_INROW: - if (components & VB_HAS_NRM0) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n"); - } - break; - case XF_SRCCOLORS_INROW: - _assert_( texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1 ); - break; - case XF_SRCBINORMAL_T_INROW: - if (components & VB_HAS_NRM1) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n"); - } - break; - case XF_SRCBINORMAL_B_INROW: - if (components & VB_HAS_NRM2) { - _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); - out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n"); - } - break; - default: - _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); - if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) - out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); - break; - } - - // first transformation - SetUidField(texMtxInfo[i].texgentype, xfregs.texMtxInfo[i].texgentype); - switch (texinfo.texgentype) { - case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map - - if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { - // transform the light dir into tangent space - SetUidField(texMtxInfo[i].embosslightshift, xfregs.texMtxInfo[i].embosslightshift); - SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); - out.Write("ldir = normalize(" I_LIGHTS".lights[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); - out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); - } - else - { - _assert_(0); // should have normals - SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); - out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); - } - - break; - case XF_TEXGEN_COLOR_STRGBC0: - _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); - break; - case XF_TEXGEN_COLOR_STRGBC1: - _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); - out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); - break; - case XF_TEXGEN_REGULAR: - default: - SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); - if (components & (VB_HAS_TEXMTXIDX0<(object, components, api_type); -} - -void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) -{ - GenerateVertexShader(object, components, api_type); -} diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 32d3eff91b..e2c96b972a 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -51,7 +51,7 @@ #define C_VENVCONST_END (C_DEPTHPARAMS + 4) // TODO: Need packing? -struct uid_data +struct vertex_shader_uid_data { u32 components; u32 numColorChans : 2; @@ -82,8 +82,8 @@ struct uid_data } lit_chans[4]; }; -typedef ShaderUid VertexShaderUid; -typedef ShaderCode VertexShaderCode; +typedef ShaderUid VertexShaderUid; +typedef ShaderCode VertexShaderCode; void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h~ b/Source/Core/VideoCommon/Src/VertexShaderGen.h~ index 050ed76649..32d3eff91b 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h~ +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h~ @@ -82,7 +82,7 @@ struct uid_data } lit_chans[4]; }; -typedef ShaderUid VertexShaderUid; +typedef ShaderUid VertexShaderUid; typedef ShaderCode VertexShaderCode; void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 4364bea6ea..7fef0cac3c 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -144,7 +144,7 @@ struct VideoConfig int iAdapter; // Debugging - bool bEnableShaderDebugging; + bool bEnableShaderDebugging; // TODO: Obsolete? // Static config per API // TODO: Move this out of VideoConfig diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index 61e41008b3..cdd5d6e044 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -39,13 +39,13 @@ static int s_nMaxPixelInstructions; static GLuint s_ColorMatrixProgram = 0; static GLuint s_DepthMatrixProgram = 0; PixelShaderCache::PSCache PixelShaderCache::PixelShaders; -PIXELSHADERUID PixelShaderCache::s_curuid; +PixelShaderUid PixelShaderCache::s_curuid; bool PixelShaderCache::s_displayCompileAlert; GLuint PixelShaderCache::CurrentShader; bool PixelShaderCache::ShaderEnabled; PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry = NULL; -PIXELSHADERUID PixelShaderCache::last_uid; +PixelShaderUid PixelShaderCache::last_uid; GLuint PixelShaderCache::GetDepthMatrixProgram() { @@ -183,16 +183,15 @@ void PixelShaderCache::Shutdown() FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { - PIXELSHADERUID uid; - GetPixelShaderId(&uid, dstAlphaMode, components); - + PixelShaderUid uid; + GetPixelShaderId(uid, dstAlphaMode, API_OPENGL, components); + // Check if the shader is already set if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(API_OPENGL, last_entry->safe_uid, last_entry->shader.strprog, dstAlphaMode, components); return &last_entry->shader; } } @@ -206,19 +205,18 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, dstAlphaMode, components); return &last_entry->shader; } // Make an entry in the table PSCacheEntry& newentry = PixelShaders[uid]; last_entry = &newentry; - const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components); + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_OPENGL, components); - if (g_ActiveConfig.bEnableShaderDebugging && code) + if (g_ActiveConfig.bEnableShaderDebugging) { - GetSafePixelShaderId(&newentry.safe_uid, dstAlphaMode, components); - newentry.shader.strprog = code; + newentry.shader.strprog = code.GetBuffer(); } #if defined(_DEBUG) || defined(DEBUGFAST) @@ -227,11 +225,11 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp char szTemp[MAX_PATH]; sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, code); + SaveData(szTemp, code.GetBuffer()); /// XXX } #endif - if (!code || !CompilePixelShader(newentry.shader, code)) { + if (!code.GetBuffer() || !CompilePixelShader(newentry.shader, code.GetBuffer())) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return NULL; } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h index f528eeb35b..aff1b49fcb 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h @@ -53,20 +53,19 @@ class PixelShaderCache { shader.Destroy(); } - PIXELSHADERUIDSAFE safe_uid; }; - typedef std::map PSCache; + typedef std::map PSCache; static PSCache PixelShaders; - static PIXELSHADERUID s_curuid; // the current pixel shader uid (progressively changed as memory is written) + static PixelShaderUid s_curuid; // the current pixel shader uid (progressively changed as memory is written) static bool s_displayCompileAlert; static GLuint CurrentShader; static PSCacheEntry* last_entry; - static PIXELSHADERUID last_uid; + static PixelShaderUid last_uid; static bool ShaderEnabled; From b519d371286e8c5fa614167cc0652e16789a699c Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 7 Aug 2012 01:16:02 +0200 Subject: [PATCH 04/54] Move new lighting shader uids to LightingShaderGen.h --- .../VideoCommon/Src/LightingShaderGen.cpp | 195 ----------------- .../Core/VideoCommon/Src/LightingShaderGen.h | 204 +++++++++++++++++- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 4 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 199 +---------------- 4 files changed, 204 insertions(+), 398 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp index f32e5dfeee..963b2c529a 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp @@ -18,198 +18,3 @@ #include "LightingShaderGen.h" #include "NativeVertexFormat.h" #include "XFMemory.h" - -#define WRITE p+=sprintf - -int GetLightingShaderId(u32* out) -{ - for (u32 i = 0; i < xfregs.numChan.numColorChans; ++i) - { - out[i] = xfregs.color[i].enablelighting ? - (u32)xfregs.color[i].hex : - (u32)xfregs.color[i].matsource; - out[i] |= (xfregs.alpha[i].enablelighting ? - (u32)xfregs.alpha[i].hex : - (u32)xfregs.alpha[i].matsource) << 15; - } - _assert_(xfregs.numChan.numColorChans <= 2); - return xfregs.numChan.numColorChans; -} - -// coloralpha - 1 if color, 2 if alpha -char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha) -{ - const char* swizzle = "xyzw"; - if (coloralpha == 1 ) swizzle = "xyz"; - else if (coloralpha == 2 ) swizzle = "w"; - - if (!(chan.attnfunc & 1)) { - // atten disabled - switch (chan.diffusefunc) { - case LIGHTDIF_NONE: - WRITE(p, "lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - WRITE(p, "ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); - WRITE(p, "lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); - break; - default: _assert_(0); - } - } - else { // spec and spot - - if (chan.attnfunc == 3) - { // spot - WRITE(p, "ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); - WRITE(p, "dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); - WRITE(p, "attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); - } - else if (chan.attnfunc == 1) - { // specular - WRITE(p, "ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); - WRITE(p, "attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); - WRITE(p, "attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); - } - - switch (chan.diffusefunc) - { - case LIGHTDIF_NONE: - WRITE(p, "lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - WRITE(p, "lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, - chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - lightsName, - index, - swizzle); - break; - default: _assert_(0); - } - } - WRITE(p, "\n"); - return p; -} - -// vertex shader -// lights/colors -// materials name is I_MATERIALS in vs and I_PMATERIALS in ps -// inColorName is color in vs and colors_ in ps -// dest is o.colors_ in vs and colors_ in ps -char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) -{ - for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) - { - const LitChannel& color = xfregs.color[j]; - const LitChannel& alpha = xfregs.alpha[j]; - - WRITE(p, "{\n"); - - if (color.matsource) {// from vertex - if (components & (VB_HAS_COL0 << j)) - WRITE(p, "mat = %s%d;\n", inColorName, j); - else if (components & VB_HAS_COL0) - WRITE(p, "mat = %s0;\n", inColorName); - else - WRITE(p, "mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - else // from color - WRITE(p, "mat = %s.C%d;\n", materialsName, j+2); - - if (color.enablelighting) { - if (color.ambsource) { // from vertex - if (components & (VB_HAS_COL0< +void GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) +{ +#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; + const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; + const char* swizzle = "xyzw"; + if (coloralpha == 1 ) swizzle = "xyz"; + else if (coloralpha == 2 ) swizzle = "w"; + + SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); + SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); + if (!(chan.attnfunc & 1)) { + // atten disabled + switch (chan.diffusefunc) { + case LIGHTDIF_NONE: + object.Write("lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); + object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); + break; + default: _assert_(0); + } + } + else { // spec and spot + + if (chan.attnfunc == 3) + { // spot + object.Write("ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); + object.Write("dist2 = dot(ldir, ldir);\n" + "dist = sqrt(dist2);\n" + "ldir = ldir / dist;\n" + "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); + object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); + } + else if (chan.attnfunc == 1) + { // specular + object.Write("ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); + object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); + object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); + } + + switch (chan.diffusefunc) + { + case LIGHTDIF_NONE: + object.Write("lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + break; + case LIGHTDIF_SIGN: + case LIGHTDIF_CLAMP: + object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", + swizzle, + chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", + lightsName, + index, + swizzle); + break; + default: _assert_(0); + } + } + object.Write("\n"); +} + +// vertex shader +// lights/colors +// materials name is I_MATERIALS in vs and I_PMATERIALS in ps +// inColorName is color in vs and colors_ in ps +// dest is o.colors_ in vs and colors_ in ps +template +void GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +{ + for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) + { + const LitChannel& color = xfregs.color[j]; + const LitChannel& alpha = xfregs.alpha[j]; + + object.Write("{\n"); + + SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); + if (color.matsource) {// from vertex + if (components & (VB_HAS_COL0 << j)) + object.Write("mat = %s%d;\n", inColorName, j); + else if (components & VB_HAS_COL0) + object.Write("mat = %s0;\n", inColorName); + else + object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); + } + else // from color + object.Write("mat = %s.C%d;\n", materialsName, j+2); + + SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); + if (color.enablelighting) { + SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); + if (color.ambsource) { // from vertex + if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); + } + } + } + } + + // no shared lights + for (int i = 0; i < 8; ++i) + { + if (!(mask&(1<(object, i, j, lightsName, 1); + if (!(mask&(1<(object, i, j+2, lightsName, 2); + } + } + else if (color.enablelighting || alpha.enablelighting) + { + // lights are disabled on one channel so process only the active ones + const LitChannel& workingchannel = color.enablelighting ? color : alpha; + const int lit_index = color.enablelighting ? j : (j+2); + int coloralpha = color.enablelighting ? 1 : 2; + + SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); + for (int i = 0; i < 8; ++i) + { + if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); + } + } + object.Write("%s%d = mat * saturate(lacc);\n", dest, j); + object.Write("}\n"); + } +} +#undef SetUidField #endif // _LIGHTINGSHADERGEN_H_ diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 5ee95848b5..d52ed1ac0a 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -440,8 +440,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u { if (nIndirectStagesUsed & (1< -void _GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) -{ -#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; - const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; - const char* swizzle = "xyzw"; - if (coloralpha == 1 ) swizzle = "xyz"; - else if (coloralpha == 2 ) swizzle = "w"; - SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); - SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); - if (!(chan.attnfunc & 1)) { - // atten disabled - switch (chan.diffusefunc) { - case LIGHTDIF_NONE: - object.Write("lacc.%s += %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - object.Write("ldir = normalize(%s.lights[%d].pos.xyz - pos.xyz);\n", lightsName, index); - object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); - break; - default: _assert_(0); - } - } - else { // spec and spot - - if (chan.attnfunc == 3) - { // spot - object.Write("ldir = %s.lights[%d].pos.xyz - pos.xyz;\n", lightsName, index); - object.Write("dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s.lights[%d].dir.xyz));\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); - } - else if (chan.attnfunc == 1) - { // specular - object.Write("ldir = normalize(%s.lights[%d].pos.xyz);\n", lightsName, index); - object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.lights[%d].dir.xyz)) : 0.0f;\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s.lights[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s.lights[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); - } - - switch (chan.diffusefunc) - { - case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * %s.lights[%d].col.%s;\n", swizzle, lightsName, index, swizzle); - break; - case LIGHTDIF_SIGN: - case LIGHTDIF_CLAMP: - object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s.lights[%d].col.%s;\n", - swizzle, - chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - lightsName, - index, - swizzle); - break; - default: _assert_(0); - } - } - object.Write("\n"); -} - -// vertex shader -// lights/colors -// materials name is I_MATERIALS in vs and I_PMATERIALS in ps -// inColorName is color in vs and colors_ in ps -// dest is o.colors_ in vs and colors_ in ps -template -void _GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) -{ - for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) - { - const LitChannel& color = xfregs.color[j]; - const LitChannel& alpha = xfregs.alpha[j]; - - object.Write("{\n"); - - SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); - if (color.matsource) {// from vertex - if (components & (VB_HAS_COL0 << j)) - object.Write("mat = %s%d;\n", inColorName, j); - else if (components & VB_HAS_COL0) - object.Write("mat = %s0;\n", inColorName); - else - object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); - } - else // from color - object.Write("mat = %s.C%d;\n", materialsName, j+2); - - SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); - if (color.enablelighting) { - SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); - if (color.ambsource) { // from vertex - if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); - } - } - } - } - - // no shared lights - for (int i = 0; i < 8; ++i) - { - if (!(mask&(1<(object, i, j, lightsName, 1); - if (!(mask&(1<(object, i, j+2, lightsName, 2); - } - } - else if (color.enablelighting || alpha.enablelighting) - { - // lights are disabled on one channel so process only the active ones - const LitChannel& workingchannel = color.enablelighting ? color : alpha; - const int lit_index = color.enablelighting ? j : (j+2); - int coloralpha = color.enablelighting ? 1 : 2; - - SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); - for (int i = 0; i < 8; ++i) - { - if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); - } - } - object.Write("%s%d = mat * saturate(lacc);\n", dest, j); - object.Write("}\n"); - } -} - -// TODO: Problem: this one uses copy constructors or sth for uids when returning... template void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { @@ -414,7 +217,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) } // TODO: This probably isn't necessary if pixel lighting is enabled. - _GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); if(xfregs.numChan.numColorChans < 2) { From 8902c6e38b68ab3327fd25183425c37c3445e9d1 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 7 Aug 2012 14:36:56 +0200 Subject: [PATCH 05/54] Some cleanups, add more fields to pixel shader uid. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 73 ++++++++++++------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 59 +++++++++++++++ Source/Core/VideoCommon/Src/ShaderGenCommon.h | 1 + 3 files changed, 107 insertions(+), 26 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index d52ed1ac0a..9aad5556aa 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -243,7 +243,7 @@ static void BuildSwapModeTable() swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2]; swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1]; swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2]; - swapModeTable[i][4] = 0; + swapModeTable[i][4] = '\0'; } } @@ -251,6 +251,7 @@ template void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { #define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; +#define OR_UidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name |= value; }; if (type == GO_ShaderCode) { setlocale(LC_NUMERIC, "C"); // Reset locale for compilation @@ -258,12 +259,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } /// text[sizeof(text) - 1] = 0x7C; // canary - /// TODO: Uids! - BuildSwapModeTable(); // Needed for WriteStage unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; - char *p = text; out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", numStages, numTexgen/*, bpmem.genMode.numindstages*/); @@ -276,31 +274,17 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u SetUidField(genMode.numtexgens, bpmem.genMode.numtexgens); // Declare samplers - if(ApiType != API_D3D11) - { - out.Write("uniform sampler2D "); - } - else - { - out.Write("sampler "); - } - - bool bfirst = true; + out.Write((ApiType != API_D3D11) ? "uniform sampler2D " : "sampler "); for (int i = 0; i < 8; ++i) - { - out.Write("%s samp%d : register(s%d)", bfirst?"":",", i, i); - bfirst = false; - } + out.Write("%s samp%d : register(s%d)", (i==0)?"":",", i, i); + out.Write(";\n"); if(ApiType == API_D3D11) { out.Write("Texture2D "); - bfirst = true; for (int i = 0; i < 8; ++i) - { - out.Write("%s Tex%d : register(t%d)", bfirst?"":",", i, i); - bfirst = false; - } + out.Write("%s Tex%d : register(t%d)", (i==0)?"":",", i, i); + out.Write(";\n"); } @@ -484,6 +468,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u RegisterStates[i].AuxStored = false; } + BuildSwapModeTable(); // Uids set in WriteStage for (unsigned int i = 0; i < numStages; i++) WriteStage(out, i, ApiType); //build the equation for this stage @@ -491,6 +476,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register + SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); + SetUidField(combiners[numStages-1].alphaC.dest, bpmem.combiners[numStages-1].alphaC.dest); if(bpmem.combiners[numStages - 1].colorC.dest != 0) { /// SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); @@ -614,8 +601,15 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) out.Write("// TEV stage %d\n", n); + OR_UidField(bHasIndStage, bHasIndStage << n); + if (n < 8) { OR_UidField(tevorders_n_texcoord1, texcoord << (3 * n)); } + else OR_UidField(tevorders_n_texcoord2, texcoord << (3 * n - 24)); if (bHasIndStage) { + OR_UidField(tevind_n_bs, bpmem.tevind[n].bs << (2*n)); + OR_UidField(tevind_n_bt, bpmem.tevind[n].bt << (2*n)); + OR_UidField(tevind_n_fmt, bpmem.tevind[n].fmt << (2*n)); + out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) @@ -629,17 +623,21 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias + if (n < 8) { OR_UidField(tevind_n_bias1, bpmem.tevind[n].bias << (3*n)); } /// XXX: brackets? + else OR_UidField(tevind_n_bias2, bpmem.tevind[n].bias << (3*n - 24)); if (bpmem.tevind[n].bias != ITB_NONE ) out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale + if (n < 8) { OR_UidField(tevind_n_mid1, bpmem.tevind[n].mid << (4*n)); } /// XXX: brackets? + else OR_UidField(tevind_n_mid2, bpmem.tevind[n].mid << (4*n - 32)); if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1); out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", - n, mtxidx, n, mtxidx+1, n); + n, mtxidx, n, mtxidx+1, n); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix @@ -663,6 +661,11 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) // Wrapping // --------- + if (n < 8) { OR_UidField(tevorders_n_sw1, bpmem.tevind[n].sw << (3 * n)); } + else OR_UidField(tevorders_n_sw2, bpmem.tevind[n].sw << (3 * n - 24)); + if (n < 8) { OR_UidField(tevorders_n_tw1, bpmem.tevind[n].tw << (3 * n)); } + else OR_UidField(tevorders_n_tw2, bpmem.tevind[n].tw << (3 * n - 24)); + // wrap S if (bpmem.tevind[n].sw == ITW_OFF) out.Write("wrappedcoord.x = uv%d.x;\n", texcoord); @@ -695,6 +698,12 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) || ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { + const int i = bpmem.combiners[n].alphaC.rswap; + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap1 << (i*2)); + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap1 << (i*2 + 1)); + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap2 << (i*2 + 16)); + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap2 << (i*2 + 17)); + char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); out.Write("crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); @@ -712,6 +721,12 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) out.Write("tevcoord.xy = float2(0.0f, 0.0f);\n"); } + const int i = bpmem.combiners[n].alphaC.tswap; + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap1 << (i*2)); + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap1 << (i*2 + 1)); + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap2 << (i*2 + 16)); + OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap2 << (i*2 + 17)); + char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); @@ -950,13 +965,19 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode) // using discard then return works the same in cg and dx9 but not in dx11 out.Write("if(!( "); + SetUidField(alpha_test.comp0, bpmem.alpha_test.comp0); + SetUidField(alpha_test.logic, bpmem.alpha_test.comp1); + SetUidField(alpha_test.logic, bpmem.alpha_test.logic); + + // Lookup the first component from the alpha function table int compindex = bpmem.alpha_test.comp0; - out.Write(tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table + out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op + // Lookup the second component from the alpha function table compindex = bpmem.alpha_test.comp1; - out.Write(tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table + out.Write(tevAlphaFuncsTable[compindex], alphaRef[1]); out.Write(")) {\n"); out.Write("ocol0 = 0;\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 61be98aa6e..994f70c9ea 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -94,6 +94,65 @@ struct pixel_shader_uid_data u32 bi4 : 3; u32 bc4 : 3; } tevindref; + + u32 tevorders_n_texcoord1 : 24; // 8 x 3 bit + u32 tevorders_n_texcoord2 : 24; // 8 x 3 bit + u32 tevorders_n_sw1 : 24; // 8 x 3 bit + u32 tevorders_n_sw2 : 24; // 8 x 3 bit + u32 tevorders_n_tw1 : 24; // 8 x 3 bit + u32 tevorders_n_tw2 : 24; // 8 x 3 bit + + u32 tevind_n_bs : 32; // 16 x 2 bit + u32 tevind_n_fmt : 32; // 16 x 2 bit + u32 tevind_n_bt : 32; // 16 x 2 bit + u32 tevind_n_bias1 : 24; // 8 x 3 bit + u32 tevind_n_bias2 : 24; // 8 x 3 bit + u32 tevind_n_mid1 : 32; // 8 x 4 bit + u32 tevind_n_mid2 : 32; // 8 x 4 bit + + u32 tevksel_n_swap : 32; // 8 x 2 bit (swap1) + 8 x 2 bit (swap2) + struct + { + struct //abc=8bit,d=10bit + { + u32 d : 4; // TEVSELCC_X + u32 c : 4; // TEVSELCC_X + u32 b : 4; // TEVSELCC_X + u32 a : 4; // TEVSELCC_X + + u32 bias : 2; + u32 op : 1; + u32 clamp : 1; + + u32 shift : 2; + u32 dest : 2; //1,2,3 + } colorC; + struct + { + u32 rswap : 2; + u32 tswap : 2; + u32 d : 3; // TEVSELCA_ + u32 c : 3; // TEVSELCA_ + u32 b : 3; // TEVSELCA_ + u32 a : 3; // TEVSELCA_ + + u32 bias : 2; //GXTevBias + u32 op : 1; + u32 clamp : 1; + + u32 shift : 2; + u32 dest : 2; //1,2,3 + } alphaC; + } combiners[16]; + struct + { + u32 comp0 : 3; + u32 comp1 : 3; + u32 logic : 2; + // TODO: ref??? + } alpha_test; + + u32 bHasIndStage : 16; }; typedef ShaderUid PixelShaderUid; diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 090fb24b57..39fcf13d95 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -29,6 +29,7 @@ class ShaderUid public: ShaderUid() { + // TODO: Move to Shadergen => can be optimized out memset(values, 0, sizeof(values)); } From 67be1e939a15fd881565e38c63a590dc94473bc7 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 2 Sep 2012 14:31:37 +0200 Subject: [PATCH 06/54] PixelShaderGen: Some more work... --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 47 ++++++++++--------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 4 +- .../Plugin_VideoOGL/Src/PixelShaderCache.cpp | 2 +- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 9aad5556aa..8bfc3abd8a 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -40,8 +40,8 @@ template static void WriteStage(char *&p, int n, API_TYPE ApiType); template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); // static void WriteAlphaCompare(char *&p, int num, int comp); -template static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); -template static void WriteFog(char *&p); +template static void WriteAlphaTest(T& out, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); +template static void WriteFog(T& out); static const char *tevKSelTableC[] = // KCSEL { @@ -273,6 +273,16 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u SetUidField(genMode.numtevstages, bpmem.genMode.numtevstages); SetUidField(genMode.numtexgens, bpmem.genMode.numtexgens); + int nIndirectStagesUsed = 0; + if (bpmem.genMode.numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + // Declare samplers out.Write((ApiType != API_D3D11) ? "uniform sampler2D " : "sampler "); for (int i = 0; i < 8; ++i) @@ -287,7 +297,6 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write(";\n"); } - out.Write("\n"); out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); @@ -326,6 +335,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write(" in float4 colors_0 : COLOR0,\n"); out.Write(" in float4 colors_1 : COLOR1"); + // TODO: ... this looks like an incredibly ugly hack - is it still needed? // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { @@ -345,7 +355,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } else { - /// TODO: Set numTexGen used + SetUidField(xfregs_numTexGen_numTexGens, xfregs.numTexGen.numTexGens); for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); } @@ -363,6 +373,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { + SetUidField(xfregs_numTexGen_numTexGens, xfregs.numTexGen.numTexGens); if (xfregs.numTexGen.numTexGens < 7) { out.Write("float3 _norm0 = normalize(Normal.xyz);\n\n"); @@ -378,7 +389,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("float4 mat, lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); - +/// TODO /// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } @@ -409,16 +420,6 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } // indirect texture map lookup - int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) - { - for (unsigned int i = 0; i < numStages; ++i) - { - /// Ignoring this for now, handled in WriteStage. - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } SetUidField(nIndirectStagesUsed, nIndirectStagesUsed); for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { @@ -427,6 +428,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u unsigned int texcoord = bpmem.tevindref.getTexCoord(i); unsigned int texmap = bpmem.tevindref.getTexMap(i); + /// TODO: Cleanup... if (i == 0) { SetUidField(tevindref.bc0, texcoord); @@ -468,15 +470,16 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u RegisterStates[i].AuxStored = false; } - BuildSwapModeTable(); // Uids set in WriteStage + // Uid fields for BuildSwapModeTable are set in WriteStage + BuildSwapModeTable(); for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, i, ApiType); //build the equation for this stage + WriteStage(out, i, ApiType); // build the equation for this stage if(numStages) { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register - SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); + SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); // TODO: These probably don't need to be set anymore here... SetUidField(combiners[numStages-1].alphaC.dest, bpmem.combiners[numStages-1].alphaC.dest); if(bpmem.combiners[numStages - 1].colorC.dest != 0) { @@ -1061,12 +1064,12 @@ static void WriteFog(T& out) out.Write(" prev.rgb = lerp(prev.rgb," I_FOG"[0].rgb,fog);\n"); } -void GetPixelShaderId(PixelShaderUid& object, DSTALPHA_MODE dst_alpha_mode, API_TYPE ApiType, u32 components) +void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - GeneratePixelShader(object, dst_alpha_mode, ApiType, components); + GeneratePixelShader(object, dstAlphaMode, ApiType, components); } -void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dst_alpha_mode, API_TYPE ApiType, u32 components) +void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - GeneratePixelShader(object, dst_alpha_mode, ApiType, components); + GeneratePixelShader(object, dstAlphaMode, ApiType, components); } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 994f70c9ea..86fdd5dd82 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -153,6 +153,8 @@ struct pixel_shader_uid_data } alpha_test; u32 bHasIndStage : 16; + + u32 xfregs_numTexGen_numTexGens : 4; }; typedef ShaderUid PixelShaderUid; @@ -160,6 +162,6 @@ typedef ShaderCode PixelShaderCode; void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); -void GetPixelShaderId(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); #endif // GCOGL_PIXELSHADER_H diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index cdd5d6e044..81e1547c71 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -184,7 +184,7 @@ void PixelShaderCache::Shutdown() FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { PixelShaderUid uid; - GetPixelShaderId(uid, dstAlphaMode, API_OPENGL, components); + GetPixelShaderUid(uid, dstAlphaMode, API_OPENGL, components); // Check if the shader is already set if (last_entry) From 700cce9588fcd53b016161690f0d4c1fa5ccf3f3 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 2 Sep 2012 18:30:21 +0200 Subject: [PATCH 07/54] More work on making new pixel shader uids work --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 19 +++-- Source/Core/VideoCommon/Src/PixelShaderGen.h | 76 ++++++++++++------- 2 files changed, 62 insertions(+), 33 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 8bfc3abd8a..01f7768fb1 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -39,7 +39,6 @@ template static void WriteStage(char *&p, int n, API_TYPE ApiType); template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -// static void WriteAlphaCompare(char *&p, int num, int comp); template static void WriteAlphaTest(T& out, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); template static void WriteFog(T& out); @@ -250,6 +249,7 @@ static void BuildSwapModeTable() template void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { + // TODO: Can be optimized if using alpha pass #define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; #define OR_UidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name |= value; }; if (type == GO_ShaderCode) @@ -664,10 +664,12 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) // Wrapping // --------- - if (n < 8) { OR_UidField(tevorders_n_sw1, bpmem.tevind[n].sw << (3 * n)); } - else OR_UidField(tevorders_n_sw2, bpmem.tevind[n].sw << (3 * n - 24)); - if (n < 8) { OR_UidField(tevorders_n_tw1, bpmem.tevind[n].tw << (3 * n)); } - else OR_UidField(tevorders_n_tw2, bpmem.tevind[n].tw << (3 * n - 24)); + if (n < 8) { OR_UidField(tevind_n_sw1, bpmem.tevind[n].sw << (3 * n)); } + else OR_UidField(tevind_n_sw2, bpmem.tevind[n].sw << (3 * n - 24)); + if (n < 8) { OR_UidField(tevind_n_tw1, bpmem.tevind[n].tw << (3 * n)); } + else OR_UidField(tevind_n_tw2, bpmem.tevind[n].tw << (3 * n - 24)); + + OR_UidField(tevind_n_fb_addprev, bpmem.tevind[n].fb_addprev << n); // wrap S if (bpmem.tevind[n].sw == ITW_OFF) @@ -694,6 +696,9 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; + SetUidField(combiners[n].colorC.hex, cc.hex&0xFFFFFF); + SetUidField(combiners[n].alphaC.hex, ac.hex&0xFFFFFF); + if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC @@ -1023,9 +1028,12 @@ static const char *tevFogFuncsTable[] = template static void WriteFog(T& out) { + SetUidField(fog.fsel, bpmem.fog.c_proj_fsel.fsel); if(bpmem.fog.c_proj_fsel.fsel == 0) return; //no Fog + SetUidField(fog.proj, bpmem.fog.c_proj_fsel.proj); + if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective @@ -1042,6 +1050,7 @@ static void WriteFog(T& out) // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. + SetUidField(fog.RangeBaseEnabled, bpmem.fogRange.Base.Enabled); if(bpmem.fogRange.Base.Enabled) { out.Write(" float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 86fdd5dd82..1476cb634d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -97,10 +97,11 @@ struct pixel_shader_uid_data u32 tevorders_n_texcoord1 : 24; // 8 x 3 bit u32 tevorders_n_texcoord2 : 24; // 8 x 3 bit - u32 tevorders_n_sw1 : 24; // 8 x 3 bit - u32 tevorders_n_sw2 : 24; // 8 x 3 bit - u32 tevorders_n_tw1 : 24; // 8 x 3 bit - u32 tevorders_n_tw2 : 24; // 8 x 3 bit + u32 tevind_n_sw1 : 24; // 8 x 3 bit + u32 tevind_n_sw2 : 24; // 8 x 3 bit + u32 tevind_n_tw1 : 24; // 8 x 3 bit + u32 tevind_n_tw2 : 24; // 8 x 3 bit + u32 tevind_n_fb_addprev : 16; // 16 x 1 bit u32 tevind_n_bs : 32; // 16 x 2 bit u32 tevind_n_fmt : 32; // 16 x 2 bit @@ -113,35 +114,41 @@ struct pixel_shader_uid_data u32 tevksel_n_swap : 32; // 8 x 2 bit (swap1) + 8 x 2 bit (swap2) struct { - struct //abc=8bit,d=10bit - { - u32 d : 4; // TEVSELCC_X - u32 c : 4; // TEVSELCC_X - u32 b : 4; // TEVSELCC_X - u32 a : 4; // TEVSELCC_X + union { + struct //abc=8bit,d=10bit + { + u32 d : 4; // TEVSELCC_X + u32 c : 4; // TEVSELCC_X + u32 b : 4; // TEVSELCC_X + u32 a : 4; // TEVSELCC_X - u32 bias : 2; - u32 op : 1; - u32 clamp : 1; + u32 bias : 2; + u32 op : 1; + u32 clamp : 1; - u32 shift : 2; - u32 dest : 2; //1,2,3 + u32 shift : 2; + u32 dest : 2; //1,2,3 + }; + u32 hex : 24; } colorC; - struct - { - u32 rswap : 2; - u32 tswap : 2; - u32 d : 3; // TEVSELCA_ - u32 c : 3; // TEVSELCA_ - u32 b : 3; // TEVSELCA_ - u32 a : 3; // TEVSELCA_ + union { + struct + { + u32 rswap : 2; + u32 tswap : 2; + u32 d : 3; // TEVSELCA_ + u32 c : 3; // TEVSELCA_ + u32 b : 3; // TEVSELCA_ + u32 a : 3; // TEVSELCA_ - u32 bias : 2; //GXTevBias - u32 op : 1; - u32 clamp : 1; + u32 bias : 2; //GXTevBias + u32 op : 1; + u32 clamp : 1; - u32 shift : 2; - u32 dest : 2; //1,2,3 + u32 shift : 2; + u32 dest : 2; //1,2,3 + }; + u32 hex : 24; } alphaC; } combiners[16]; struct @@ -152,6 +159,17 @@ struct pixel_shader_uid_data // TODO: ref??? } alpha_test; + union { + struct + { + u32 proj : 1; // 0 - perspective, 1 - orthographic + u32 fsel : 3; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 - backward exp, 7 - backward exp2 + u32 RangeBaseEnabled : 1; + }; + u32 hex : 4; + } fog; + + u32 bHasIndStage : 16; u32 xfregs_numTexGen_numTexGens : 4; @@ -159,9 +177,11 @@ struct pixel_shader_uid_data typedef ShaderUid PixelShaderUid; typedef ShaderCode PixelShaderCode; +//typedef ShaderConstantProfile PixelShaderConstantProfile; void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +//void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); #endif // GCOGL_PIXELSHADER_H From 0fdeb81038d9ef599d4741035245972c3d374be7 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 2 Sep 2012 20:00:15 +0200 Subject: [PATCH 08/54] Add some code for generating a shader constant usage profile. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 62 +++++++++++- Source/Core/VideoCommon/Src/PixelShaderGen.h | 5 +- .../VideoCommon/Src/PixelShaderManager.cpp | 99 ++++++++++++------- .../Core/VideoCommon/Src/PixelShaderManager.h | 2 +- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 35 ++++++- .../Plugin_VideoOGL/Src/VertexManager.cpp | 2 +- 6 files changed, 163 insertions(+), 42 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 01f7768fb1..a19eb1a27e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -299,9 +299,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } out.Write("\n"); - out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); + out.Write("uniform float4 " I_COLORS"[4] : register(c%d);\n", C_COLORS); // TODO: first element not used?? out.Write("uniform float4 " I_KCOLORS"[4] : register(c%d);\n", C_KCOLORS); - out.Write("uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); + out.Write("uniform float4 " I_ALPHA"[1] : register(c%d);\n", C_ALPHA); // TODO: Why is this an array...-.- out.Write("uniform float4 " I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); out.Write("uniform float4 " I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); out.Write("uniform float4 " I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); @@ -390,6 +390,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u "float3 ldir, h;\n" "float dist, dist2, attn;\n"); /// TODO + out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further + out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); /// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } @@ -405,6 +407,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } else { + out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS+numTexgen-1); for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides @@ -450,7 +453,10 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u SetUidField(tevindref.bi4, texmap); } if (texcoord < numTexgen) + { + out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); out.Write("tempcoord = uv%d.xy * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy"); + } else out.Write("tempcoord = float2(0.0f, 0.0f);\n"); @@ -504,12 +510,14 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u WriteAlphaTest(out, ApiType, dstAlphaMode); // the screen space depth value = far z + (clip z / clip w) * z range + out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); // Note: depth textures are disabled if early depth test is enabled if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... + out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); @@ -521,7 +529,10 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) + { + out.SetConstantsUsed(C_ALPHA, C_ALPHA); out.Write(" ocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); + } else { WriteFog(out); @@ -532,6 +543,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u // single pass if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) { + out.SetConstantsUsed(C_ALPHA, C_ALPHA); // Colors will be blended against the alpha from ocol1... out.Write(" ocol1 = ocol0;\n"); // ...and the alpha from ocol0 will be written to the framebuffer. @@ -639,6 +651,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1); + out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", n, mtxidx, n, mtxidx+1, n); } @@ -646,12 +659,14 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); + out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); + out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); } else @@ -757,6 +772,10 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) { out.Write("ckonsttemp = konsttemp;\n"); } + if (kc > 7) + out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4)); + if (ka > 7) + out.SetConstantsUsed(C_KCOLORS+((ka-0xc)%4),C_KCOLORS+((ka-0xc)%4)); } if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV @@ -782,6 +801,8 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0) { + // TODO: WTF? + out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) { out.Write("cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); @@ -800,6 +821,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1 || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1) { + out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) { out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); @@ -818,6 +840,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2 || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2) { + out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) { out.Write("cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); @@ -834,6 +857,28 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); RegisterStates[cc.dest].AuxStored = false; +/* if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0) + { + out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); + // TODO: 11 bit signed overflow.. + } + if (cc.d == TEVCOLORARG_C1 || cc.d == TEVCOLORARG_A1 || ac.d == TEVALPHAARG_A1) + { + out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); + // TODO: 11 bit signed overflow.. + } + if (cc.d == TEVCOLORARG_C2 || cc.d == TEVCOLORARG_A2 || ac.d == TEVALPHAARG_A2) + { + out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); + // TODO: 11 bit signed overflow.. + }*/ + + // TODO: Are there enums for this? + if (cc.dest >= 1 && cc.dest <= 3) + out.SetConstantsUsed(C_COLORS+cc.dest, C_COLORS+cc.dest); + if (ac.dest >= 1 && ac.dest <= 3) + out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); + out.Write("// color combine\n"); if (cc.clamp) out.Write("%s = saturate(", tevCOutputTable[cc.dest]); @@ -935,6 +980,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) template void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { + out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); if (ApiType == API_D3D11) out.Write("%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap); else @@ -968,7 +1014,9 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode) { I_ALPHA"[0].r", I_ALPHA"[0].g" - }; + }; + + out.SetConstantsUsed(C_ALPHA, C_ALPHA); // using discard then return works the same in cg and dx9 but not in dx11 out.Write("if(!( "); @@ -1034,6 +1082,7 @@ static void WriteFog(T& out) SetUidField(fog.proj, bpmem.fog.c_proj_fsel.proj); + out.SetConstantsUsed(C_FOG, C_FOG+1); if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective @@ -1053,6 +1102,7 @@ static void WriteFog(T& out) SetUidField(fog.RangeBaseEnabled, bpmem.fogRange.Base.Enabled); if(bpmem.fogRange.Base.Enabled) { + out.SetConstantsUsed(C_FOG+2, C_FOG+2); out.Write(" float x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n"); out.Write(" x_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); out.Write(" ze *= x_adjust;\n"); @@ -1082,3 +1132,9 @@ void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode { GeneratePixelShader(object, dstAlphaMode, ApiType, components); } + +void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +{ + GeneratePixelShader(object, dstAlphaMode, ApiType, components); +} + diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 1476cb634d..221b04612e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -72,7 +72,6 @@ struct pixel_shader_uid_data u32 numtevstages : 4; u32 numindstages : 3; } genMode; - u32 fogc_proj_fselfsel : 3; struct { u32 unknown : 1; @@ -177,11 +176,11 @@ struct pixel_shader_uid_data typedef ShaderUid PixelShaderUid; typedef ShaderCode PixelShaderCode; -//typedef ShaderConstantProfile PixelShaderConstantProfile; +typedef ShaderConstantProfile PixelShaderConstantProfile; void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); -//void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); +void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); #endif // GCOGL_PIXELSHADER_H diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index eaaf99fbc8..ffec7d64cb 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -83,39 +83,58 @@ void PixelShaderManager::Shutdown() } -void PixelShaderManager::SetConstants() +void PixelShaderManager::SetConstants(u32 components) { - for (int i = 0; i < 2; ++i) + PixelShaderConstantProfile constant_profile(C_PENVCONST_END); + /// TODO: dst alpha/api/components type parameter... + GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components); + + static int saved_updates = 0; + static int necessary_updates = 0; + + +#define IncStuff() { \ + saved_updates++; \ + printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates); } + + for (int i = 0; i < 2; ++i) { - if (s_nColorsChanged[i]) + if (s_nColorsChanged[i]) { - int baseind = i ? C_KCOLORS : C_COLORS; - for (int j = 0; j < 4; ++j) + int baseind = i ? C_KCOLORS : C_COLORS; + for (int j = 0; j < 4; ++j) { - if (s_nColorsChanged[i] & (1 << j)) - SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]); - } - s_nColorsChanged[i] = 0; - } - } + if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j)) + { + SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]); + s_nColorsChanged[i] &= ~(1<>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); s_bAlphaChanged = false; - } + } else if (s_bAlphaChanged) IncStuff(); - if (s_bZTextureTypeChanged) + if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS)) { float ftemp[4]; switch (bpmem.ztex2.type) @@ -133,11 +152,12 @@ void PixelShaderManager::SetConstants() ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0; break; } + ++necessary_updates; SetPSConstant4fv(C_ZBIAS, ftemp); s_bZTextureTypeChanged = false; - } + } else if (s_bZTextureTypeChanged) IncStuff(); - if (s_bZBiasChanged || s_bDepthRangeChanged) + if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1)) { // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) // [0] = width/2 @@ -148,9 +168,10 @@ void PixelShaderManager::SetConstants() // [5] = 16777215 * farz //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); + ++necessary_updates; SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f); s_bZBiasChanged = s_bDepthRangeChanged = false; - } + }else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff(); // indirect incoming texture scales if (s_nIndTexScaleChanged) @@ -158,7 +179,7 @@ void PixelShaderManager::SetConstants() // set as two sets of vec4s, each containing S and T of two ind stages. float f[8]; - if (s_nIndTexScaleChanged & 0x03) + if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE)) { for (u32 i = 0; i < 2; ++i) { @@ -166,26 +187,30 @@ void PixelShaderManager::SetConstants() f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); } + ++necessary_updates; SetPSConstant4fv(C_INDTEXSCALE, f); + s_nIndTexScaleChanged &= ~0x03; } + else if ((s_nIndTexScaleChanged & 0x03)) IncStuff(); - if (s_nIndTexScaleChanged & 0x0c) { + if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1)) { for (u32 i = 2; i < 4; ++i) { f[2 * i] = bpmem.texscale[1].getScaleS(i & 1); f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); } + ++necessary_updates; SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]); + s_nIndTexScaleChanged &= ~0x0c; } - - s_nIndTexScaleChanged = 0; + else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff(); } if (s_nIndTexMtxChanged) { for (int i = 0; i < 3; ++i) { - if (s_nIndTexMtxChanged & (1 << i)) + if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1))) { int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) | ((u32)bpmem.indmtx[i].col1.s1 << 2) | @@ -195,6 +220,8 @@ void PixelShaderManager::SetConstants() // xyz - static matrix // TODO w - dynamic matrix scale / 256...... somehow / 4 works better // rev 2972 - now using / 256.... verify that this works + ++necessary_updates; + ++necessary_updates; SetPSConstant4f(C_INDTEXMTX + 2 * i, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, @@ -210,19 +237,22 @@ void PixelShaderManager::SetConstants() i, 1024.0f*fscale, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale, bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale); - } + + s_nIndTexMtxChanged &= ~(1 << i); + }else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();} } - s_nIndTexMtxChanged = 0; } - if (s_bFogColorChanged) + if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG)) { + ++necessary_updates; SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0); s_bFogColorChanged = false; - } + }else if (s_bFogColorChanged) IncStuff(); - if (s_bFogParamChanged) + if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1)) { + ++necessary_updates; if(!g_ActiveConfig.bDisableFog) { //downscale magnitude to 0.24 bits @@ -235,10 +265,11 @@ void PixelShaderManager::SetConstants() SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0); s_bFogParamChanged = false; - } + }else if ( s_bFogParamChanged) IncStuff(); - if (s_bFogRangeAdjustChanged) + if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2)) { + ++necessary_updates; if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1) { //bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342; @@ -257,8 +288,9 @@ void PixelShaderManager::SetConstants() SetPSConstant4f(C_FOG + 2, 0.0f, 1.0f, 1.0f, 0.0f); // Need to update these values for older hardware that fails to divide by zero in shaders. s_bFogRangeAdjustChanged = false; - } + }else if ( s_bFogRangeAdjustChanged) IncStuff(); + // TODO: use constant profile here! if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f { if (nLightsChanged[0] >= 0) @@ -353,7 +385,8 @@ void PixelShaderManager::SetPSTextureDims(int texid) SetPSConstant4fv(C_TEXDIMS + texid, fdims); } -// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value +// This one is high in profiles (0.5%). +// TODO: Move conversion out, only store the raw color value // and update it when the shader constant is set, only. void PixelShaderManager::SetColorChanged(int type, int num, bool high) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.h b/Source/Core/VideoCommon/Src/PixelShaderManager.h index 12d749c871..348940f495 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.h +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.h @@ -34,7 +34,7 @@ public: static void Shutdown(); static void DoState(PointerWrap &p); - static void SetConstants(); // sets pixel shader constants + static void SetConstants(u32 components); // sets pixel shader constants // constant management, should be called after memory is committed static void SetColorChanged(int type, int index, bool high); diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 39fcf13d95..e052066628 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -23,6 +23,8 @@ #include #include "CommonTypes.h" +#include + template class ShaderUid { @@ -36,6 +38,7 @@ public: void Write(const char* fmt, ...) {} const char* GetBuffer() { return NULL; } void SetBuffer(char* buffer) { } + inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} bool operator == (const ShaderUid& obj) const { @@ -55,7 +58,7 @@ public: return false; } - uid_data& GetUidData() { return data; } + inline uid_data& GetUidData() { return data; } private: union @@ -86,16 +89,46 @@ public: const char* GetBuffer() { return buf; } void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } uid_data& GetUidData() { return *(uid_data*)NULL; } + inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} private: const char* buf; char* write_ptr; }; +template +class ShaderConstantProfile +{ +public: + ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); } + + void Write(const char* fmt, ...) {} + const char* GetBuffer() { return NULL; } + void SetBuffer(char* buffer) { } + uid_data& GetUidData() { return *(uid_data*)NULL; } + + // has room for optimization (if it matters at all...) + void NumConstants() { return constant_usage.size(); } + + inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) + { + for (unsigned int i = first_index; i < last_index+1; ++i) + constant_usage[i] = true; + } + + inline bool ConstantIsUsed(unsigned int index) + { + return constant_usage[index]; + } +private: + std::vector constant_usage; // TODO: Is vector appropriate here? +}; + enum GenOutput { GO_ShaderCode, GO_ShaderUid, + GO_ShaderConstantProfile, }; #endif // _SHADERGENCOMMON_H diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index ce33247d7c..d89f28ba5d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -180,7 +180,7 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; From 9f1582843d03dbe9ee607ddcc8323699d408d693 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 2 Sep 2012 19:14:43 +0200 Subject: [PATCH 09/54] PixelShaderManager: Reduce number of redundant shader constant updates --- .../VideoCommon/Src/PixelShaderManager.cpp | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index ffec7d64cb..e458c5f1e1 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -42,19 +42,45 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30 static u32 lastZBias; static int nMaterialsChanged; +static float s_constant_cache[C_PENVCONST_END*4]; + inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) { + if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 && + s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4) + return; + g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4); + s_constant_cache[const_number*4] = f1; + s_constant_cache[const_number*4+1] = f2; + s_constant_cache[const_number*4+2] = f3; + s_constant_cache[const_number*4+3] = f4; } inline void SetPSConstant4fv(unsigned int const_number, const float *f) { + if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] && + s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3]) + return; + g_renderer->SetPSConstant4fv(const_number, f); + s_constant_cache[const_number*4] = f[0]; + s_constant_cache[const_number*4+1] = f[1]; + s_constant_cache[const_number*4+2] = f[2]; + s_constant_cache[const_number*4+3] == f[3]; } inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) { + for (unsigned int i = 0; i < 4*count; ++i) + if (s_constant_cache[const_number*4+i] != f[i]) + break; + else if (i == 4*count-1) + return; + g_renderer->SetMultiPSConstant4fv(const_number, count, f); + for (unsigned int i = 0; i < 4*count; ++i) + s_constant_cache[const_number*4+i] = f[i]; } void PixelShaderManager::Init() @@ -63,6 +89,7 @@ void PixelShaderManager::Init() memset(lastTexDims, 0, sizeof(lastTexDims)); lastZBias = 0; memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); + memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side.... Dirty(); } @@ -95,7 +122,7 @@ void PixelShaderManager::SetConstants(u32 components) #define IncStuff() { \ saved_updates++; \ - printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates); } + /*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ } for (int i = 0; i < 2; ++i) { From 76148a52b82cae8e6cf11b6eeeeff55345b0f12e Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 28 Jan 2013 22:51:15 +0100 Subject: [PATCH 10/54] Fix a few other things --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 30 ++++++++++++------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 19 +++++++----- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index a19eb1a27e..fa38075118 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -273,16 +273,6 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u SetUidField(genMode.numtevstages, bpmem.genMode.numtevstages); SetUidField(genMode.numtexgens, bpmem.genMode.numtexgens); - int nIndirectStagesUsed = 0; - if (bpmem.genMode.numindstages > 0) - { - for (unsigned int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } - } - // Declare samplers out.Write((ApiType != API_D3D11) ? "uniform sampler2D " : "sampler "); for (int i = 0; i < 8; ++i) @@ -317,6 +307,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("uniform s_" I_PMATERIALS" " I_PMATERIALS" : register(c%d);\n", C_PMATERIALS); } + // TODO: Somehow should put ApiType in the hash.. out.Write("void main(\n"); if(ApiType != API_D3D11) { @@ -355,6 +346,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } else { + // TODO: Not necessary... SetUidField(xfregs_numTexGen_numTexGens, xfregs.numTexGen.numTexGens); for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i); @@ -423,10 +415,20 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } // indirect texture map lookup + int nIndirectStagesUsed = 0; + if (bpmem.genMode.numindstages > 0) + { + for (unsigned int i = 0; i < numStages; ++i) + { + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; + } + } + SetUidField(nIndirectStagesUsed, nIndirectStagesUsed); for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { - if (nIndirectStagesUsed & (1<(out, ApiType, dstAlphaMode); @@ -514,8 +517,13 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); // Note: depth textures are disabled if early depth test is enabled + SetUidField(Pretest, Pretest); + SetUidField(ztex.op, bpmem.ztex2.op); + SetUidField(early_z, bpmem.zcontrol.early_ztest); + SetUidField(ztestenable, bpmem.zmode.testenable); if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) { + // TODO: Implement type?? // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 221b04612e..c1c210dce6 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -20,6 +20,7 @@ #include "VideoCommon.h" #include "ShaderGenCommon.h" +#include "BPMemory.h" #define I_COLORS "color" #define I_KCOLORS "k" @@ -54,18 +55,11 @@ enum DSTALPHA_MODE DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending }; -enum ALPHA_PRETEST_RESULT -{ - ALPHAPT_UNDEFINED, // AlphaTest Result is not defined - ALPHAPT_ALWAYSFAIL, // Alpha test alway Fail - ALPHAPT_ALWAYSPASS // Alpha test alway Pass -}; - struct pixel_shader_uid_data { u32 components; DSTALPHA_MODE dstAlphaMode; // TODO: as u32 :2 - ALPHA_PRETEST_RESULT Pretest; // TODO: As :2 + AlphaTest::TEST_RESULT Pretest; // TODO: As :2 u32 nIndirectStagesUsed : 8; struct { u32 numtexgens : 4; @@ -168,6 +162,15 @@ struct pixel_shader_uid_data u32 hex : 4; } fog; + union { + struct { + u32 op : 2; + }; + u32 hex : 2; + } ztex; + + u32 early_z : 1; + u32 ztestenable : 1; u32 bHasIndStage : 16; From 30f1a4b4fe154bb6e58dda9d3d1c8bfb4bd822d7 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 26 Mar 2013 18:07:42 +0100 Subject: [PATCH 11/54] Partially revert "Now CG plays nice with this new stuff." This reverts commit 3943840d5c422af45941a96e6d4407d59380b39d. Suppport for old GLSL versions has been dropped, so to make things less ugly we can use a structure for lights again. --- .../VideoCommon/Src/LightingShaderGen.cpp | 26 +++++++++---------- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 3 ++- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 5 ++-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp index 2b717bdba6..2cb52004a9 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp @@ -47,13 +47,13 @@ char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char // atten disabled switch (chan.diffusefunc) { case LIGHTDIF_NONE: - WRITE(p, "lacc.%s += %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle); + WRITE(p, "lacc.%s += %s[%d].col.%s;\n", swizzle, lightsName, index, swizzle); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - WRITE(p, "ldir = normalize(%s[%d + 3].xyz - pos.xyz);\n", lightsName, index * 5); - WRITE(p, "lacc.%s += %sdot(ldir, _norm0)) * %s[%d].%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index * 5, swizzle); + WRITE(p, "ldir = normalize(%s[%d].pos.xyz - pos.xyz);\n", lightsName, index); + WRITE(p, "lacc.%s += %sdot(ldir, _norm0)) * %s[%d].col.%s;\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); break; default: _assert_(0); } @@ -62,32 +62,32 @@ char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char if (chan.attnfunc == 3) { // spot - WRITE(p, "ldir = %s[%d + 3].xyz - pos.xyz;\n", lightsName, index * 5); + WRITE(p, "ldir = %s[%d].pos.xyz - pos.xyz;\n", lightsName, index); WRITE(p, "dist2 = dot(ldir, ldir);\n" "dist = sqrt(dist2);\n" "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s[%d + 4].xyz));\n", lightsName, index * 5); - WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1.0f, attn, attn*attn))) / dot(%s[%d + 2].xyz, float3(1.0f,dist,dist2));\n", lightsName, index * 5, lightsName, index * 5); + "attn = max(0.0f, dot(ldir, %s[%d].dir.xyz));\n", lightsName, index); + WRITE(p, "attn = max(0.0f, dot(%s[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); } else if (chan.attnfunc == 1) { // specular - WRITE(p, "ldir = normalize(%s[%d + 3].xyz);\n", lightsName, index * 5); - WRITE(p, "attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s[%d + 4].xyz)) : 0.0f;\n", lightsName, index * 5); - WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1,attn,attn*attn))) / dot(%s[%d + 2].xyz, float3(1,attn,attn*attn));\n", lightsName, index * 5, lightsName, index * 5); + WRITE(p, "ldir = normalize(%s[%d].pos.xyz);\n", lightsName, index); + WRITE(p, "attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s[%d].dir.xyz)) : 0.0f;\n", lightsName, index); + WRITE(p, "attn = max(0.0f, dot(%s[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); } switch (chan.diffusefunc) { case LIGHTDIF_NONE: - WRITE(p, "lacc.%s += attn * %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle); + WRITE(p, "lacc.%s += attn * %s[%d].col.%s;\n", swizzle, lightsName, index, swizzle); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - WRITE(p, "lacc.%s += attn * %sdot(ldir, _norm0)) * %s[%d].%s;\n", + WRITE(p, "lacc.%s += attn * %sdot(ldir, _norm0)) * %s[%d].col.%s;\n", swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, - index * 5, + index, swizzle); break; default: _assert_(0); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index d863a6e0a9..d7e74a049d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -590,7 +590,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WRITE(p, "\t%sfloat4 " I_FOG"[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG)); // For pixel lighting - WRITE(p, "\t%sfloat4 " I_PLIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); + WRITE(p, "struct Light { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; };\n"); + WRITE(p, "\t%sLight " I_PLIGHTS"[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); WRITE(p, "\t%sfloat4 " I_PMATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS)); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 2c0f5676ed..2410548397 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -192,7 +192,8 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) WRITE(p, "%sfloat4 " I_POSNORMALMATRIX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSNORMALMATRIX)); WRITE(p, "%sfloat4 " I_PROJECTION"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PROJECTION)); WRITE(p, "%sfloat4 " I_MATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_MATERIALS)); - WRITE(p, "%sfloat4 " I_LIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_LIGHTS)); + WRITE(p, "struct Light { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; };\n"); + WRITE(p, "%sLight " I_LIGHTS"[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_LIGHTS)); WRITE(p, "%sfloat4 " I_TEXMATRICES"[24] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXMATRICES)); // also using tex matrices WRITE(p, "%sfloat4 " I_TRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType),WriteRegister(ApiType, "c", C_TRANSFORMMATRICES)); WRITE(p, "%sfloat4 " I_NORMALMATRICES"[32] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_NORMALMATRICES)); @@ -413,7 +414,7 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType) if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { // transform the light dir into tangent space - WRITE(p, "ldir = normalize(" I_LIGHTS"[%d + 3].xyz - pos.xyz);\n", texinfo.embosslightshift); + WRITE(p, "ldir = normalize(" I_LIGHTS"[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); } else From b75a617d8a2e4307ed4af06669e92fc33f6af1df Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 26 Mar 2013 19:35:00 +0100 Subject: [PATCH 12/54] VertexShaderGen: De-uglify VS output structure writing --- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 58 ++++++++++++------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 2410548397..f647bd0280 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -131,33 +131,49 @@ static char text[16384]; #define WRITE p+=sprintf +char* DefineVSOutputStructMember(char* p, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) +{ + WRITE(p, " %s %s", type, name); + if (var_index != -1) + WRITE(p, "%d", var_index); + + if (api_type == API_OPENGL) + WRITE(p, ";\n"); + else + { + if (semantic_index != -1) + WRITE(p, " : %s%d;\n", semantic, semantic_index); + else + WRITE(p, " : %s;\n", semantic); + } + + return p; +} + char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE ApiType) { - // GLSL makes this ugly - // TODO: Make pretty WRITE(p, "struct VS_OUTPUT {\n"); - WRITE(p, " float4 pos %s POSITION;\n", ApiType == API_OPENGL ? ";//" : ":"); - WRITE(p, " float4 colors_0 %s COLOR0;\n", ApiType == API_OPENGL ? ";//" : ":"); - WRITE(p, " float4 colors_1 %s COLOR1;\n", ApiType == API_OPENGL ? ";//" : ":"); + p = DefineVSOutputStructMember(p, ApiType, "float4", "pos", -1, "POSITION"); + p = DefineVSOutputStructMember(p, ApiType, "float4", "colors_", 0, "COLOR", 0); + p = DefineVSOutputStructMember(p, ApiType, "float4", "colors_", 1, "COLOR", 1); - if (xfregs.numTexGen.numTexGens < 7) { + if (xfregs.numTexGen.numTexGens < 7) + { for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " float3 tex%d %s TEXCOORD%d;\n", i, ApiType == API_OPENGL ? ";//" : ":", i); - WRITE(p, " float4 clipPos %s TEXCOORD%d;\n", ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens); + p = DefineVSOutputStructMember(p, ApiType, "float3", "tex", i, "TEXCOORD", i); + + p = DefineVSOutputStructMember(p, ApiType, "float4", "clipPos", -1, "TEXCOORD", xfregs.numTexGen.numTexGens); + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - WRITE(p, " float4 Normal %s TEXCOORD%d;\n", ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens + 1); - } else { - // clip position is in w of first 4 texcoords - if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) - { - for (int i = 0; i < 8; ++i) - WRITE(p, " float4 tex%d %s TEXCOORD%d;\n", i, ApiType == API_OPENGL? ";//" : ":", i); - } - else - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - WRITE(p, " float%d tex%d %s TEXCOORD%d;\n", i < 4 ? 4 : 3 , i, ApiType == API_OPENGL ? ";//" : ":", i); - } + p = DefineVSOutputStructMember(p, ApiType, "float4", "Normal", -1, "TEXCOORD", xfregs.numTexGen.numTexGens + 1); + } + else + { + // Store clip position in the w component of first 4 texcoords + bool ppl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; + int num_texcoords = ppl ? 8 : xfregs.numTexGen.numTexGens; + for (int i = 0; i < num_texcoords; ++i) + p = DefineVSOutputStructMember(p, ApiType, (ppl || i < 4) ? "float4" : "float3", "tex", i, "TEXCOORD", i); } WRITE(p, "};\n"); From 0e319432164ef0c906e947a2c54fa51739d00e7c Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 26 Mar 2013 23:03:10 +0100 Subject: [PATCH 13/54] ShaderGenCommon: Introduce a common shader generator interface to make stuff less confusing. --- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index e052066628..d02462360d 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -26,7 +26,18 @@ #include template -class ShaderUid +class ShaderGeneratorInterface +{ +public: + void Write(const char* fmt, ...) {} + const char* GetBuffer() { return NULL; } + void SetBuffer(char* buffer) { } + inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} + uid_data& GetUidData() { return *(uid_data*)NULL; } +}; + +template +class ShaderUid : public ShaderGeneratorInterface { public: ShaderUid() @@ -35,11 +46,6 @@ public: memset(values, 0, sizeof(values)); } - void Write(const char* fmt, ...) {} - const char* GetBuffer() { return NULL; } - void SetBuffer(char* buffer) { } - inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} - bool operator == (const ShaderUid& obj) const { return memcmp(this->values, obj.values, sizeof(values)) == 0; @@ -70,7 +76,7 @@ private: // Needs to be a template for hacks... template -class ShaderCode +class ShaderCode : public ShaderGeneratorInterface { public: ShaderCode() : buf(NULL), write_ptr(NULL) @@ -88,8 +94,6 @@ public: const char* GetBuffer() { return buf; } void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; } - uid_data& GetUidData() { return *(uid_data*)NULL; } - inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} private: const char* buf; @@ -97,16 +101,11 @@ private: }; template -class ShaderConstantProfile +class ShaderConstantProfile : public ShaderGeneratorInterface { public: ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); } - void Write(const char* fmt, ...) {} - const char* GetBuffer() { return NULL; } - void SetBuffer(char* buffer) { } - uid_data& GetUidData() { return *(uid_data*)NULL; } - // has room for optimization (if it matters at all...) void NumConstants() { return constant_usage.size(); } From 364a5093d9e36a43de44c0aec735d0ee11e7a3bb Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 26 Mar 2013 23:21:08 +0100 Subject: [PATCH 14/54] ShaderGenCommon: Replace the GenOutput enum by using typeid instead. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 16 ++++---- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 41 ++++++++++--------- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 7 ---- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 15 +++---- 4 files changed, 38 insertions(+), 41 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index 09b0cdc28f..5945bc09ca 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -18,15 +18,17 @@ #ifndef _LIGHTINGSHADERGEN_H_ #define _LIGHTINGSHADERGEN_H_ +#include + #include "ShaderGenCommon.h" #include "NativeVertexFormat.h" #include "XFMemory.h" // T.uid_data needs to have a struct named lighting_uid -template +template void GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) { -#define SetUidField(name, value) if (type == GO_ShaderUid) { object.GetUidData().name = value; }; +#define SetUidField(name, value) if (typeid(T) == typeid(UidType)) { object.GetUidData().name = value; }; const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; const char* swizzle = "xyzw"; if (coloralpha == 1 ) swizzle = "xyz"; @@ -92,7 +94,7 @@ void GenerateLightShader(T& object, int index, int litchan_index, const char* li // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps -template +template void GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) @@ -186,7 +188,7 @@ void GenerateLightingShader(T& object, int components, const char* materialsName { if (mask & (1<(object, i, j, lightsName, 3); + GenerateLightShader(object, i, j, lightsName, 3); } } } @@ -196,9 +198,9 @@ void GenerateLightingShader(T& object, int components, const char* materialsName for (int i = 0; i < 8; ++i) { if (!(mask&(1<(object, i, j, lightsName, 1); + GenerateLightShader(object, i, j, lightsName, 1); if (!(mask&(1<(object, i, j+2, lightsName, 2); + GenerateLightShader(object, i, j+2, lightsName, 2); } } else if (color.enablelighting || alpha.enablelighting) @@ -212,7 +214,7 @@ void GenerateLightingShader(T& object, int components, const char* materialsName for (int i = 0; i < 8; ++i) { if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); + GenerateLightShader(object, i, lit_index, lightsName, coloralpha); } } object.Write("%s%d = mat * saturate(lacc);\n", dest, j); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 44023e2000..f22ef94c6d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "LightingShaderGen.h" #include "PixelShaderGen.h" @@ -37,10 +38,10 @@ // output is given by .outreg // tevtemp is set according to swapmodetables and -template static void WriteStage(char *&p, int n, API_TYPE ApiType); -template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -template static void WriteAlphaTest(T& out, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template static void WriteFog(T& out); +template static void WriteStage(char *&p, int n, API_TYPE ApiType); +template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +template static void WriteAlphaTest(T& out, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +template static void WriteFog(T& out); static const char *tevKSelTableC[] = // KCSEL { @@ -266,13 +267,13 @@ const char *WriteLocation(API_TYPE ApiType) return result; } -template +template void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { // TODO: Can be optimized if using alpha pass -#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; -#define OR_UidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name |= value; }; - if (type == GO_ShaderCode) +#define SetUidField(name, value) if (typeid(T) == typeid(PixelShaderUid)) {out.GetUidData().name = value; }; +#define OR_UidField(name, value) if (typeid(T) == typeid(PixelShaderUid)) {out.GetUidData().name |= value; }; + if (typeid(T) == typeid(PixelShaderCode)) { setlocale(LC_NUMERIC, "C"); // Reset locale for compilation out.SetBuffer(text); @@ -558,7 +559,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u char buffer[32]; sprintf(buffer, "float3 indtex%d", i); - SampleTexture(out, buffer, "tempcoord", "abg", texmap, ApiType); + SampleTexture(out, buffer, "tempcoord", "abg", texmap, ApiType); } } @@ -575,7 +576,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u // Uid fields for BuildSwapModeTable are set in WriteStage BuildSwapModeTable(); for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, i, ApiType); // build the equation for this stage + WriteStage(out, i, ApiType); // build the equation for this stage if (numStages) { @@ -604,7 +605,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); SetUidField(Pretest, Pretest); if (Pretest == AlphaTest::UNDETERMINED) - WriteAlphaTest(out, ApiType, dstAlphaMode, per_pixel_depth); + WriteAlphaTest(out, ApiType, dstAlphaMode, per_pixel_depth); // the screen space depth value = far z + (clip z / clip w) * z range @@ -648,7 +649,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } else { - WriteFog(out); + WriteFog(out); out.Write("\tocol0 = prev;\n"); } @@ -716,7 +717,7 @@ static const char *TEVCMPAlphaOPTable[16] = }; -template +template static void WriteStage(T& out, int n, API_TYPE ApiType) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); @@ -865,7 +866,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); - SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); + SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); } else out.Write("textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); @@ -1090,7 +1091,7 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) out.Write("// TEV done\n"); } -template +template void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); @@ -1120,7 +1121,7 @@ static const char *tevAlphaFunclogicTable[] = " == " // xnor }; -template +template static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char *alphaRef[2] = @@ -1188,7 +1189,7 @@ static const char *tevFogFuncsTable[] = "\tfog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" //backward exp2 }; -template +template static void WriteFog(T& out) { SetUidField(fog.fsel, bpmem.fog.c_proj_fsel.fsel); @@ -1240,16 +1241,16 @@ static void WriteFog(T& out) void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - GeneratePixelShader(object, dstAlphaMode, ApiType, components); + GeneratePixelShader(object, dstAlphaMode, ApiType, components); } void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - GeneratePixelShader(object, dstAlphaMode, ApiType, components); + GeneratePixelShader(object, dstAlphaMode, ApiType, components); } void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - GeneratePixelShader(object, dstAlphaMode, ApiType, components); + GeneratePixelShader(object, dstAlphaMode, ApiType, components); } diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index d02462360d..8606a4e5b9 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -123,11 +123,4 @@ private: std::vector constant_usage; // TODO: Is vector appropriate here? }; -enum GenOutput -{ - GO_ShaderCode, - GO_ShaderUid, - GO_ShaderConstantProfile, -}; - #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 2ea5ef5d7f..3e2daf55d3 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -17,6 +17,7 @@ #include #include +#include #include "NativeVertexFormat.h" @@ -80,13 +81,13 @@ void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) extern const char *WriteRegister(API_TYPE api_type, const char *prefix, const u32 num); extern const char *WriteLocation(API_TYPE api_type); -template +template void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { #undef SetUidField -#define SetUidField(name, value) if (type == GO_ShaderUid) {out.GetUidData().name = value; }; +#define SetUidField(name, value) if (typeid(T) == typeid(VertexShaderUid)) {out.GetUidData().name = value; }; - if (type == GO_ShaderCode) + if (typeid(T) == typeid(VertexShaderCode)) { out.SetBuffer(text); setlocale(LC_NUMERIC, "C"); // Reset locale for compilation @@ -275,7 +276,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) } // TODO: This probably isn't necessary if pixel lighting is enabled. - GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); if (xfregs.numChan.numColorChans < 2) { @@ -522,16 +523,16 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) /// if (text[sizeof(text) - 1] != 0x7C) /// PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); - if (type == GO_ShaderCode) + if (typeid(T) == typeid(VertexShaderCode)) setlocale(LC_NUMERIC, ""); // restore locale } void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type) { - GenerateVertexShader(object, components, api_type); + GenerateVertexShader(object, components, api_type); } void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) { - GenerateVertexShader(object, components, api_type); + GenerateVertexShader(object, components, api_type); } From 24ab51f9f618cea9260f5b432317f5ac95a538d6 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 26 Mar 2013 23:35:14 +0100 Subject: [PATCH 15/54] Fix Windows build, try 1. --- .../Plugin_VideoDX11/Src/PixelShaderCache.cpp | 23 +++++++------- .../Plugin_VideoDX11/Src/PixelShaderCache.h | 7 ++--- .../Src/VertexShaderCache.cpp | 24 +++++++-------- .../Plugin_VideoDX11/Src/VertexShaderCache.h | 7 ++--- .../Plugin_VideoDX9/Src/PixelShaderCache.cpp | 30 +++++++++---------- .../Plugin_VideoDX9/Src/PixelShaderCache.h | 7 ++--- .../Plugin_VideoDX9/Src/VertexShaderCache.cpp | 25 ++++++++-------- .../Plugin_VideoDX9/Src/VertexShaderCache.h | 7 ++--- 8 files changed, 60 insertions(+), 70 deletions(-) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp index 56f3719057..f818c5df87 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp @@ -41,9 +41,9 @@ namespace DX11 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry; -PIXELSHADERUID PixelShaderCache::last_uid; +PixelShaderUid PixelShaderCache::last_uid; -LinearDiskCache g_ps_disk_cache; +LinearDiskCache g_ps_disk_cache; ID3D11PixelShader* s_ColorMatrixProgram[2] = {NULL}; ID3D11PixelShader* s_ColorCopyProgram[2] = {NULL}; @@ -363,10 +363,10 @@ ID3D11Buffer* &PixelShaderCache::GetConstantBuffer() } // this class will load the precompiled shaders into our cache -class PixelShaderCacheInserter : public LinearDiskCacheReader +class PixelShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size) + void Read(const PixelShaderUid &key, const u8 *value, u32 value_size) { PixelShaderCache::InsertByteCode(key, value, value_size); } @@ -461,8 +461,8 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { - PIXELSHADERUID uid; - GetPixelShaderId(&uid, dstAlphaMode, components); + PixelShaderUid uid; + GetPixelShaderUid(uid, dstAlphaMode, components); // Check if the shader is already set if (last_entry) @@ -470,7 +470,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); - ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components); return (last_entry->shader != NULL); } } @@ -486,15 +485,15 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); - ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components); return (entry.shader != NULL); } // Need to compile a new shader - const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components); + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components); D3DBlob* pbytecode; - if (!D3D::CompilePixelShader(code, (unsigned int)strlen(code), &pbytecode)) + if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), &pbytecode)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; @@ -508,7 +507,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { - PixelShaders[uid].code = code; + PixelShaders[uid].code = code.GetBuffer(); GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components); } @@ -516,7 +515,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) return success; } -bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen) +bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen) { ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); if (shader == NULL) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h index 874a47e4c1..f08fa43142 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h @@ -35,7 +35,7 @@ public: static void Clear(); static void Shutdown(); static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader - static bool InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen); + static bool InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen); static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; } static ID3D11Buffer* &GetConstantBuffer(); @@ -54,18 +54,17 @@ private: { ID3D11PixelShader* shader; - PIXELSHADERUIDSAFE safe_uid; std::string code; PSCacheEntry() : shader(NULL) {} void Destroy() { SAFE_RELEASE(shader); } }; - typedef std::map PSCache; + typedef std::map PSCache; static PSCache PixelShaders; static const PSCacheEntry* last_entry; - static PIXELSHADERUID last_uid; + static PixelShaderUid last_uid; }; } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp index 6fdc815554..4ba720528e 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp @@ -37,14 +37,14 @@ namespace DX11 { VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; -VERTEXSHADERUID VertexShaderCache::last_uid; +VertexShaderUid VertexShaderCache::last_uid; static ID3D11VertexShader* SimpleVertexShader = NULL; static ID3D11VertexShader* ClearVertexShader = NULL; static ID3D11InputLayout* SimpleLayout = NULL; static ID3D11InputLayout* ClearLayout = NULL; -LinearDiskCache g_vs_disk_cache; +LinearDiskCache g_vs_disk_cache; ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; } ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; } @@ -68,10 +68,10 @@ ID3D11Buffer* &VertexShaderCache::GetConstantBuffer() } // this class will load the precompiled shaders into our cache -class VertexShaderCacheInserter : public LinearDiskCacheReader +class VertexShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size) + void Read(const VertexShaderUid &key, const u8 *value, u32 value_size) { D3DBlob* blob = new D3DBlob(value_size, value); VertexShaderCache::InsertByteCode(key, blob); @@ -208,14 +208,13 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader(u32 components) { - VERTEXSHADERUID uid; - GetVertexShaderId(&uid, components); + VertexShaderUid uid; + GetVertexShaderUid(uid, components, API_D3D11); if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components); return (last_entry->shader != NULL); } } @@ -229,14 +228,14 @@ bool VertexShaderCache::SetShader(u32 components) last_entry = &entry; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components); return (entry.shader != NULL); } - const char *code = GenerateVertexShaderCode(components, API_D3D11); + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D11); D3DBlob* pbytecode = NULL; - D3D::CompileVertexShader(code, (int)strlen(code), &pbytecode); + D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &pbytecode); if (pbytecode == NULL) { @@ -250,15 +249,14 @@ bool VertexShaderCache::SetShader(u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { - vshaders[uid].code = code; - GetSafeVertexShaderId(&vshaders[uid].safe_uid, components); + vshaders[uid].code = code.GetBuffer(); } GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; } -bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob) +bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob) { ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob); if (shader == NULL) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h index 6d9537606a..0c1dba790f 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h @@ -44,7 +44,7 @@ public: static ID3D11InputLayout* GetSimpleInputLayout(); static ID3D11InputLayout* GetClearInputLayout(); - static bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob); + static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob); private: struct VSCacheEntry @@ -52,7 +52,6 @@ private: ID3D11VertexShader* shader; D3DBlob* bytecode; // needed to initialize the input layout - VERTEXSHADERUIDSAFE safe_uid; std::string code; VSCacheEntry() : shader(NULL), bytecode(NULL) {} @@ -68,11 +67,11 @@ private: SAFE_RELEASE(bytecode); } }; - typedef std::map VSCache; + typedef std::map VSCache; static VSCache vshaders; static const VSCacheEntry* last_entry; - static VERTEXSHADERUID last_uid; + static VertexShaderUid last_uid; }; } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index f66d096ab6..76beaa2c9f 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -43,9 +43,9 @@ namespace DX9 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry; -PIXELSHADERUID PixelShaderCache::last_uid; +PixelShaderUid PixelShaderCache::last_uid; -static LinearDiskCache g_ps_disk_cache; +static LinearDiskCache g_ps_disk_cache; static std::set unique_shaders; #define MAX_SSAA_SHADERS 3 @@ -67,10 +67,10 @@ static LPDIRECT3DPIXELSHADER9 s_ClearProgram = NULL; static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL; static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL; -class PixelShaderCacheInserter : public LinearDiskCacheReader +class PixelShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size) + void Read(const PixelShaderUid &key, const u8 *value, u32 value_size) { PixelShaderCache::InsertByteCode(key, value, value_size, false); } @@ -333,8 +333,8 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; - PIXELSHADERUID uid; - GetPixelShaderId(&uid, dstAlphaMode, components); + PixelShaderUid uid; + GetPixelShaderUid(uid, dstAlphaMode, components); // Check if the shader is already set if (last_entry) @@ -342,7 +342,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components); return last_entry->shader != NULL; } } @@ -359,34 +358,34 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (entry.shader) D3D::SetPixelShader(entry.shader); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components); return (entry.shader != NULL); } // Need to compile a new shader - const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components); + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, api, components); if (g_ActiveConfig.bEnableShaderDebugging) { - u32 code_hash = HashAdler32((const u8 *)code, strlen(code)); + u32 code_hash = HashAdler32((const u8 *)code.GetBuffer(), strlen(code.GetBuffer())); unique_shaders.insert(code_hash); SETSTAT(stats.numUniquePixelShaders, unique_shaders.size()); } #if defined(_DEBUG) || defined(DEBUGFAST) - if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { + if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { static int counter = 0; char szTemp[MAX_PATH]; sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(szTemp, code); + SaveData(szTemp, code.GetBuffer()); } #endif u8 *bytecode = 0; int bytecodelen = 0; - if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) { + if (!D3D::CompilePixelShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; } @@ -400,15 +399,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { - PixelShaders[uid].code = code; - GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components); + PixelShaders[uid].code = code.GetBuffer(); } GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); return success; } -bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) +bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) { LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index c771984d36..96d2c29cd9 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -41,7 +41,6 @@ private: LPDIRECT3DPIXELSHADER9 shader; bool owns_shader; - PIXELSHADERUIDSAFE safe_uid; std::string code; PSCacheEntry() : shader(NULL), owns_shader(true) {} @@ -53,18 +52,18 @@ private: } }; - typedef std::map PSCache; + typedef std::map PSCache; static PSCache PixelShaders; static const PSCacheEntry *last_entry; - static PIXELSHADERUID last_uid; + static PixelShaderUid last_uid; static void Clear(); public: static void Init(); static void Shutdown(); static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 componets); - static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); + static bool InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate); static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, bool depthConversion); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index e392cf3de0..93685335ae 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -38,14 +38,14 @@ namespace DX9 VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; -VERTEXSHADERUID VertexShaderCache::last_uid; +VertexShaderUid VertexShaderCache::last_uid; #define MAX_SSAA_SHADERS 3 static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS]; static LPDIRECT3DVERTEXSHADER9 ClearVertexShader; -LinearDiskCache g_vs_disk_cache; +LinearDiskCache g_vs_disk_cache; LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level) { @@ -58,10 +58,10 @@ LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader() } // this class will load the precompiled shaders into our cache -class VertexShaderCacheInserter : public LinearDiskCacheReader +class VertexShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size) + void Read(const VertexShaderUid &key, const u8 *value, u32 value_size) { VertexShaderCache::InsertByteCode(key, value, value_size, false); } @@ -188,14 +188,13 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader(u32 components) { - VERTEXSHADERUID uid; - GetVertexShaderId(&uid, components); + VertexShaderUid uid; + GetVertexShaderUid(uid, components); if (last_entry) { if (uid == last_uid) { GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components); return (last_entry->shader != NULL); } } @@ -210,14 +209,15 @@ bool VertexShaderCache::SetShader(u32 components) if (entry.shader) D3D::SetVertexShader(entry.shader); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); - ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components); return (entry.shader != NULL); } - const char *code = GenerateVertexShaderCode(components, API_D3D9); + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D9); + u8 *bytecode; int bytecodelen; - if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen)) + if (!D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return false; @@ -227,15 +227,14 @@ bool VertexShaderCache::SetShader(u32 components) bool success = InsertByteCode(uid, bytecode, bytecodelen, true); if (g_ActiveConfig.bEnableShaderDebugging && success) { - vshaders[uid].code = code; - GetSafeVertexShaderId(&vshaders[uid].safe_uid, components); + vshaders[uid].code = code.GetBuffer(); } delete [] bytecode; GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); return success; } -bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) { +bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) { LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen); // Make an entry in the table diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h index c9c447e35b..9b6ff0d863 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h @@ -36,7 +36,6 @@ private: LPDIRECT3DVERTEXSHADER9 shader; std::string code; - VERTEXSHADERUIDSAFE safe_uid; VSCacheEntry() : shader(NULL) {} void Destroy() @@ -47,11 +46,11 @@ private: } }; - typedef std::map VSCache; + typedef std::map VSCache; static VSCache vshaders; static const VSCacheEntry *last_entry; - static VERTEXSHADERUID last_uid; + static VertexShaderUid last_uid; static void Clear(); public: @@ -60,7 +59,7 @@ public: static bool SetShader(u32 components); static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(int level); static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader(); - static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); + static bool InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate); static std::string GetCurrentShaderCode(); }; From 98362e5934d7c3dadbc8a4f5adc6dd523e535447 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 26 Mar 2013 23:44:41 +0100 Subject: [PATCH 16/54] Fix Windows build, try 2. --- Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp | 3 +-- Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp | 2 +- Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp | 2 +- Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp | 2 +- Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp index f818c5df87..7237603eab 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp @@ -462,7 +462,7 @@ void PixelShaderCache::Shutdown() bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { PixelShaderUid uid; - GetPixelShaderUid(uid, dstAlphaMode, components); + GetPixelShaderUid(uid, dstAlphaMode, API_D3D11, components); // Check if the shader is already set if (last_entry) @@ -508,7 +508,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) if (g_ActiveConfig.bEnableShaderDebugging && success) { PixelShaders[uid].code = code.GetBuffer(); - GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components); } GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index da8c98e19e..36e2ed2a68 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -248,7 +248,7 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index 76beaa2c9f..7061ea8657 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -334,7 +334,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; PixelShaderUid uid; - GetPixelShaderUid(uid, dstAlphaMode, components); + GetPixelShaderUid(uid, dstAlphaMode, API_D3D9, components); // Check if the shader is already set if (last_entry) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 543a1b9ddc..d2fcd6164a 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -359,7 +359,7 @@ void VertexManager::vFlush() // set global constants VertexShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components); u32 stride = g_nativeVertexFmt->GetVertexStride(); if (!PixelShaderCache::SetShader(DSTALPHA_NONE,g_nativeVertexFmt->m_components)) { diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 93685335ae..05e034cedf 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -189,7 +189,7 @@ void VertexShaderCache::Shutdown() bool VertexShaderCache::SetShader(u32 components) { VertexShaderUid uid; - GetVertexShaderUid(uid, components); + GetVertexShaderUid(uid, components, API_D3D9); if (last_entry) { if (uid == last_uid) From f8d2936840482b9132a751466c3c056c5988fa9a Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 27 Mar 2013 00:13:23 +0100 Subject: [PATCH 17/54] Fix Windows build, try 3. --- .../Plugin_VideoDX11/Src/LineGeometryShader.cpp | 13 +++++++------ .../Plugin_VideoDX11/Src/PointGeometryShader.cpp | 11 ++++++----- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp index 3afd38a2e1..74c58e8d17 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp @@ -182,11 +182,12 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth, if (shaderIt == m_shaders.end()) { // Generate new shader. Warning: not thread-safe. - static char code[16384]; - char* p = code; - p = GenerateVSOutputStruct(p, components, API_D3D11); - p += sprintf(p, "\n%s", LINE_GS_COMMON); - + static char buffer[16384]; + ShaderCode code; + code.SetBuffer(buffer); + GenerateVSOutputStruct(code, components, API_D3D11); + code.Write("\n%s", LINE_GS_COMMON); + std::stringstream numTexCoordsStream; numTexCoordsStream << xfregs.numTexGen.numTexGens; @@ -198,7 +199,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth, { "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { NULL, NULL } }; - ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros); + ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros); if (!newShader) { WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp index ad23786610..e6cea19ce9 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp @@ -176,10 +176,11 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize, if (shaderIt == m_shaders.end()) { // Generate new shader. Warning: not thread-safe. - static char code[16384]; - char* p = code; - p = GenerateVSOutputStruct(p, components, API_D3D11); - p += sprintf(p, "\n%s", POINT_GS_COMMON); + static char buffer[16384]; + ShaderCode code; + code.SetBuffer(buffer); + GenerateVSOutputStruct(code, components, API_D3D11); + code.Write("\n%s", POINT_GS_COMMON); std::stringstream numTexCoordsStream; numTexCoordsStream << xfregs.numTexGen.numTexGens; @@ -192,7 +193,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize, { "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { NULL, NULL } }; - ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros); + ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros); if (!newShader) { WARN_LOG(VIDEO, "Point geometry shader for components 0x%.08X failed to compile", components); From 11fae2e1cbce03333bdceb613eb81953adae930b Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 27 Mar 2013 00:17:46 +0100 Subject: [PATCH 18/54] Fix Windows build, try 4. --- Source/Core/VideoCommon/Src/VertexShaderGen.cpp | 5 +++++ Source/Core/VideoCommon/Src/VertexShaderGen.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 3e2daf55d3..88fe24f627 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -536,3 +536,8 @@ void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE { GenerateVertexShader(object, components, api_type); } + +void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type) +{ + GenerateVSOutputStruct >(object, components, api_type); +} diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 3167229fe3..b6b176c806 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -114,6 +114,6 @@ typedef ShaderCode VertexShaderCode; void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); - +void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type); #endif // GCOGL_VERTEXSHADER_H From 45c70be83f2263c22377ac414809d85bb21e25cd Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 27 Mar 2013 00:20:25 +0100 Subject: [PATCH 19/54] Fix Windows build, try 5. --- Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp | 2 +- Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp index 74c58e8d17..f22ebef0df 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp @@ -185,7 +185,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth, static char buffer[16384]; ShaderCode code; code.SetBuffer(buffer); - GenerateVSOutputStruct(code, components, API_D3D11); + GenerateVSOutputStructForGS(code, components, API_D3D11); code.Write("\n%s", LINE_GS_COMMON); std::stringstream numTexCoordsStream; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp index e6cea19ce9..5976c09ab9 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp @@ -179,7 +179,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize, static char buffer[16384]; ShaderCode code; code.SetBuffer(buffer); - GenerateVSOutputStruct(code, components, API_D3D11); + GenerateVSOutputStructForGS(code, components, API_D3D11); code.Write("\n%s", POINT_GS_COMMON); std::stringstream numTexCoordsStream; From a171525df6f0b0b43ebfc5330d9f21cfd6af298e Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 27 Mar 2013 01:33:27 +0100 Subject: [PATCH 20/54] Fix Windows crash. --- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index f22ef94c6d..d20e6ca755 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -314,7 +314,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u { // Declare samplers for (int i = 0; i < 8; ++i) - out.Write("%s samp%d %s;\n", (ApiType == API_D3D11) ? "sampler" : "uniform sampler2D", (i==0)?"":",", i, WriteRegister(ApiType, "s", i)); + out.Write("%s samp%d %s;\n", (ApiType == API_D3D11) ? "sampler" : "uniform sampler2D", i, WriteRegister(ApiType, "s", i)); if (ApiType == API_D3D11) { From f2a8fbb314aabb986e9f5711278272649a78b098 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 14:51:54 +0100 Subject: [PATCH 21/54] PixelShaderGen: Slightly reduce the number of redundant shader compilations. --- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 12 +++++++----- Source/Core/VideoCommon/Src/PixelShaderGen.h | 5 ++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index d20e6ca755..6ee5a72994 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -289,7 +289,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", numStages, numTexgen/*, bpmem.genMode.numindstages*/); - SetUidField(components, components); +// SetUidField(components, components); // TODO: Enable once per pixel lighting is implemented again SetUidField(dstAlphaMode, dstAlphaMode); SetUidField(genMode.numindstages, bpmem.genMode.numindstages); @@ -620,8 +620,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u // Note: depth textures are disabled if early depth test is enabled SetUidField(ztex.op, bpmem.ztex2.op); - SetUidField(early_z, bpmem.zcontrol.early_ztest); // TODO: Should be per_pixel_depth instead... - SetUidField(ztestenable, bpmem.zmode.testenable); // TODO: Should be fog instead... + SetUidField(per_pixel_depth, per_pixel_depth); + SetUidField(fog.fsel, bpmem.fog.c_proj_fsel.fsel); // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; @@ -632,12 +632,13 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); - // scale to make result from frac correct + // U24 overflow emulation out.Write("zCoord = zCoord * (16777215.0f/16777216.0f);\n"); out.Write("zCoord = frac(zCoord);\n"); out.Write("zCoord = zCoord * (16777216.0f/16777215.0f);\n"); + // Note: depth texture output is only written to depth buffer if late depth test is used - // TODO: Should this be outside the ztex if-block? + // final depth value is used for fog calculation, though if (per_pixel_depth) out.Write("depth = zCoord;\n"); } @@ -1167,6 +1168,7 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, // when the alpha test fail. This is not a correct implementation because // even if the depth test fails the fragment could be alpha blended, but // we don't have a choice. + SetUidField(alpha_test.use_zcomploc_hack, bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable); if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) { out.Write("\t\tdiscard;\n"); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 4dc3e6c1e0..4f9cba0c37 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -163,6 +163,7 @@ struct pixel_shader_uid_data u32 comp1 : 3; u32 logic : 2; // TODO: ref??? + u32 use_zcomploc_hack : 1; } alpha_test; union { @@ -182,9 +183,7 @@ struct pixel_shader_uid_data u32 hex : 2; } ztex; - u32 early_z : 1; - u32 ztestenable : 1; - + u32 per_pixel_depth : 1; u32 bHasIndStage : 16; u32 xfregs_numTexGen_numTexGens : 4; From 41c4108ce66d73b81c254165f18a23f72aabf09a Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 14:54:44 +0100 Subject: [PATCH 22/54] OpenGL: Reimplement shader uid debugging. --- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 5 ++ .../Src/ProgramShaderCache.cpp | 64 ++++++++++++++++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 8606a4e5b9..d41a9c6c46 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -51,6 +51,11 @@ public: return memcmp(this->values, obj.values, sizeof(values)) == 0; } + bool operator != (const ShaderUid& obj) const + { + return memcmp(this->values, obj.values, sizeof(values)) != 0; + } + // TODO: Store last frame used and order by that? makes much more sense anyway... bool operator < (const ShaderUid& obj) const { diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 53b0210919..6ce4e1e04c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -23,6 +23,9 @@ #include "ImageWrite.h" #include "Render.h" +#include +#include + namespace OGL { @@ -353,14 +356,71 @@ GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code ) return result; } +template UidT GetPartialUid(const SHADERUID& uid); +template<> PixelShaderUid GetPartialUid(const SHADERUID& uid) { return uid.puid; } +template<> VertexShaderUid GetPartialUid(const SHADERUID& uid) { return uid.vuid; } + +template const std::string& GetShaderCode(const SHADER& shader); +template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strpprog; } +template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strvprog; } + +template +void CheckForUidMismatch(const ProgramShaderCache::PCache& cache, CodeT& new_code, const UidT& new_uid) +{ + static std::map s_shaders; + static std::vector s_uids; + + bool uid_is_indexed = std::find(s_uids.begin(), s_uids.end(), new_uid) != s_uids.end(); + if (!uid_is_indexed) + { + s_uids.push_back(new_uid); + s_shaders[new_uid] = new_code.GetBuffer(); + } + else + { + // uid is already in the index => check if there's a shader with the same uid but different code + auto& old_code = s_shaders[new_uid]; + if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0) + { + static int num_failures = 0; + + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), + std::is_same::value ? "p" : std::is_same::value ? "v" : "o", + ++num_failures); + + // TODO: Should also dump uids + std::ofstream file; + OpenFStream(file, szTemp, std::ios_base::out); + file << "Old shader code:\n" << old_code; + file << "\n\nNew shader code:\n" << new_code.GetBuffer(); + file.close(); + + // TODO: Make this more idiot-proof + ERROR_LOG(VIDEO, "%s shader uid mismatch!", + std::is_same::value ? "Pixel" : std::is_same::value ? "Vertex" : "Other"); + } + } +} -void ProgramShaderCache::GetShaderId ( SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components ) +void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components) { GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components); GetVertexShaderUid(uid->vuid, components, API_OPENGL); -} + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode pcode; + VertexShaderCode vcode; + + GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); + GenerateVertexShaderCode(vcode, components, API_OPENGL); + + CheckForUidMismatch(pshaders, pcode, uid->puid); + CheckForUidMismatch(pshaders, vcode, uid->vuid); + } +} ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void) From 4e9c3db545b2ff9dc4469f3440dc6caffd3f1832 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 15:02:48 +0100 Subject: [PATCH 23/54] OSX build fix. --- Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 6ce4e1e04c..79bcc1abbd 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -24,7 +24,7 @@ #include "Render.h" #include -#include +#include namespace OGL { @@ -386,7 +386,7 @@ void CheckForUidMismatch(const ProgramShaderCache::PCache& cache, CodeT& new_cod char szTemp[MAX_PATH]; sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), - std::is_same::value ? "p" : std::is_same::value ? "v" : "o", + (typeid(UidT) == typeid(PixelShaderUid)) ? "p" : (typeid(UidT) == typeid(VertexShaderUid)) ? "v" : "o", ++num_failures); // TODO: Should also dump uids @@ -398,7 +398,7 @@ void CheckForUidMismatch(const ProgramShaderCache::PCache& cache, CodeT& new_cod // TODO: Make this more idiot-proof ERROR_LOG(VIDEO, "%s shader uid mismatch!", - std::is_same::value ? "Pixel" : std::is_same::value ? "Vertex" : "Other"); + (typeid(UidT) == typeid(PixelShaderUid)) ? "Pixel" : (typeid(UidT) == typeid(VertexShaderUid)) ? "Vertex" : "Other"); } } } From b2517c0308e992c7e3c13e0fe17140e422ae770d Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 15:08:00 +0100 Subject: [PATCH 24/54] More build fixes. --- Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h index 615daa685d..b7b757172a 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h @@ -90,6 +90,8 @@ public: } }; + typedef std::map PCache; + static PCacheEntry GetShaderProgram(void); static GLuint GetCurrentProgram(void); static SHADER* SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); @@ -113,8 +115,6 @@ private: void Read(const SHADERUID &key, const u8 *value, u32 value_size); }; - typedef std::map PCache; - static PCache pshaders; static PCacheEntry* last_entry; static SHADERUID last_uid; From 3c02f227db1e02f233f3aac8dc42476bd0b17f7a Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 20:33:28 +0100 Subject: [PATCH 25/54] PixelShaderManager: Disable constant cache (won't work in the non-UBO path of the opengl backend). ShaderGen: Replace typeid usage with more general code. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 2 +- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 15 ++++++------ .../VideoCommon/Src/PixelShaderManager.cpp | 24 +++++++++---------- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 5 ++-- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 12 ++++------ .../Src/ProgramShaderCache.cpp | 12 +++++----- 6 files changed, 33 insertions(+), 37 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index 5945bc09ca..a58809942a 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -28,7 +28,7 @@ template void GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) { -#define SetUidField(name, value) if (typeid(T) == typeid(UidType)) { object.GetUidData().name = value; }; +#define SetUidField(name, value) if (&object.GetUidData() != NULL) { object.GetUidData().name = value; }; const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; const char* swizzle = "xyzw"; if (coloralpha == 1 ) swizzle = "xyz"; diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 6ee5a72994..49b77274a8 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include "LightingShaderGen.h" #include "PixelShaderGen.h" @@ -271,13 +270,12 @@ template void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { // TODO: Can be optimized if using alpha pass -#define SetUidField(name, value) if (typeid(T) == typeid(PixelShaderUid)) {out.GetUidData().name = value; }; -#define OR_UidField(name, value) if (typeid(T) == typeid(PixelShaderUid)) {out.GetUidData().name |= value; }; - if (typeid(T) == typeid(PixelShaderCode)) - { +#define SetUidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name = value; }; +#define OR_UidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name |= value; }; + out.SetBuffer(text); + if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - out.SetBuffer(text); - } + /// text[sizeof(text) - 1] = 0x7C; // canary unsigned int numStages = bpmem.genMode.numtevstages + 1; @@ -669,7 +667,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u /// if (text[sizeof(text) - 1] != 0x7C) /// PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); - setlocale(LC_NUMERIC, ""); // restore locale + if (out.GetBuffer() != NULL) + setlocale(LC_NUMERIC, ""); // restore locale } diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 273d82ea7c..1aceafa17d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -46,9 +46,9 @@ static float s_constant_cache[C_PENVCONST_END*4]; inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) { - if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 && - s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4) - return; +// if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 && +// s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4) +// return; g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4); s_constant_cache[const_number*4] = f1; @@ -59,9 +59,9 @@ inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float inline void SetPSConstant4fv(unsigned int const_number, const float *f) { - if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] && - s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3]) - return; +// if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] && +// s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3]) +// return; g_renderer->SetPSConstant4fv(const_number, f); s_constant_cache[const_number*4] = f[0]; @@ -72,11 +72,11 @@ inline void SetPSConstant4fv(unsigned int const_number, const float *f) inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) { - for (unsigned int i = 0; i < 4*count; ++i) - if (s_constant_cache[const_number*4+i] != f[i]) - break; - else if (i == 4*count-1) - return; +// for (unsigned int i = 0; i < 4*count; ++i) +// if (s_constant_cache[const_number*4+i] != f[i]) +// break; +// else if (i == 4*count-1) +// return; g_renderer->SetMultiPSConstant4fv(const_number, count, f); for (unsigned int i = 0; i < 4*count; ++i) @@ -149,7 +149,7 @@ void PixelShaderManager::SetConstants(u32 components) { for (int i = 0; i < 8; ++i) { - if (s_nTexDimsChanged & (1< instead }; template @@ -122,7 +122,8 @@ public: inline bool ConstantIsUsed(unsigned int index) { - return constant_usage[index]; + return true; +// return constant_usage[index]; } private: std::vector constant_usage; // TODO: Is vector appropriate here? diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 88fe24f627..869b84a4ed 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -17,7 +17,6 @@ #include #include -#include #include "NativeVertexFormat.h" @@ -85,14 +84,11 @@ template void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { #undef SetUidField -#define SetUidField(name, value) if (typeid(T) == typeid(VertexShaderUid)) {out.GetUidData().name = value; }; +#define SetUidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name = value; }; - if (typeid(T) == typeid(VertexShaderCode)) - { - out.SetBuffer(text); + out.SetBuffer(text); + if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, "C"); // Reset locale for compilation - } - // text[sizeof(text) - 1] = 0x7C; // canary @@ -523,7 +519,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) /// if (text[sizeof(text) - 1] != 0x7C) /// PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); - if (typeid(T) == typeid(VertexShaderCode)) + if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, ""); // restore locale } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 79bcc1abbd..188145ebcf 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -201,9 +201,9 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen return &last_entry->shader; } } - + last_uid = uid; - + // Check if shader is already in cache PCache::iterator iter = pshaders.find(uid); if (iter != pshaders.end()) @@ -215,17 +215,17 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen last_entry->shader.Bind(); return &last_entry->shader; } - + // Make an entry in the table PCacheEntry& newentry = pshaders[uid]; last_entry = &newentry; newentry.in_cache = 0; - + VertexShaderCode vcode; PixelShaderCode pcode; GenerateVertexShaderCode(vcode, components, API_OPENGL); GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); - + if (g_ActiveConfig.bEnableShaderDebugging) { newentry.shader.strvprog = vcode.GetBuffer(); @@ -260,7 +260,7 @@ bool ProgramShaderCache::CompileShader ( SHADER& shader, const char* vcode, cons { GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode); GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode); - + if(!vsid || !psid) { glDeleteShader(vsid); From 9eccd56ef08d1fd2d8a1e3b1d82d0c6f70347db2 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 20:59:03 +0100 Subject: [PATCH 26/54] PixelShaderGen: Some cleanups. --- Source/Core/VideoCommon/Src/BPMemory.h | 5 ++ .../Core/VideoCommon/Src/PixelShaderGen.cpp | 47 ++++++++----------- .../VideoCommon/Src/PixelShaderManager.cpp | 1 + 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index 1a29439b36..5154e2f5b0 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -157,6 +157,11 @@ #define TEVALPHAARG_KONST 6 #define TEVALPHAARG_ZERO 7 +#define GX_TEVPREV 0 +#define GX_TEVREG0 1 +#define GX_TEVREG1 2 +#define GX_TEVREG2 3 + #define ALPHACMP_NEVER 0 #define ALPHACMP_LESS 1 #define ALPHACMP_EQUAL 2 diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 49b77274a8..982f139fea 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -269,7 +269,9 @@ const char *WriteLocation(API_TYPE ApiType) template void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { + // TODO: Where does this TODO belong again...? // TODO: Can be optimized if using alpha pass + #define SetUidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name = value; }; #define OR_UidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name |= value; }; out.SetBuffer(text); @@ -915,7 +917,6 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0) { - // TODO: WTF? out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) { @@ -971,26 +972,19 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); RegisterStates[cc.dest].AuxStored = false; -/* if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0) - { + if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0) out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); - // TODO: 11 bit signed overflow.. - } - if (cc.d == TEVCOLORARG_C1 || cc.d == TEVCOLORARG_A1 || ac.d == TEVALPHAARG_A1) - { - out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); - // TODO: 11 bit signed overflow.. - } - if (cc.d == TEVCOLORARG_C2 || cc.d == TEVCOLORARG_A2 || ac.d == TEVALPHAARG_A2) - { - out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); - // TODO: 11 bit signed overflow.. - }*/ - // TODO: Are there enums for this? - if (cc.dest >= 1 && cc.dest <= 3) + if (cc.d == TEVCOLORARG_C1 || cc.d == TEVCOLORARG_A1 || ac.d == TEVALPHAARG_A1) + out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); + + if (cc.d == TEVCOLORARG_C2 || cc.d == TEVCOLORARG_A2 || ac.d == TEVALPHAARG_A2) + out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); + + if (cc.dest >= GX_TEVREG0 && cc.dest <= GX_TEVREG2) out.SetConstantsUsed(C_COLORS+cc.dest, C_COLORS+cc.dest); - if (ac.dest >= 1 && ac.dest <= 3) + + if (ac.dest >= GX_TEVREG0 && ac.dest <= GX_TEVREG2) out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); out.Write("// color combine\n"); @@ -1103,14 +1097,14 @@ void SampleTexture(T& out, const char *destination, const char *texcoords, const static const char *tevAlphaFuncsTable[] = { - "(false)", //ALPHACMP_NEVER 0, TODO: Not safe? - "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1 - "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2 - "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3 - "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4 - "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5 - "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6 - "(true)" //ALPHACMP_ALWAYS 7 + "(false)", // NEVER + "(prev.a <= %s - (0.25f/255.0f))", // LESS + "(abs( prev.a - %s ) < (0.5f/255.0f))", // EQUAL + "(prev.a < %s + (0.25f/255.0f))", // LEQUAL + "(prev.a >= %s + (0.25f/255.0f))", // GREATER + "(abs( prev.a - %s ) >= (0.5f/255.0f))", // NEQUAL + "(prev.a > %s - (0.25f/255.0f))", // GEQUAL + "(true)" // ALWAYS }; static const char *tevAlphaFunclogicTable[] = @@ -1132,7 +1126,6 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, out.SetConstantsUsed(C_ALPHA, C_ALPHA); - // using discard then return works the same in cg and dx9 but not in dx11 out.Write("\tif(!( "); diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 1aceafa17d..f63a1e59c6 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -419,6 +419,7 @@ void PixelShaderManager::SetPSTextureDims(int texid) // This one is high in profiles (0.5%). // TODO: Move conversion out, only store the raw color value // and update it when the shader constant is set, only. +// TODO: Conversion should be checked in the context of tev_fixes.. void PixelShaderManager::SetColorChanged(int type, int num, bool high) { float *pf = &lastRGBAfull[type][num][0]; From e31c2aa6013d6a95358251610c15820a34e65a35 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 21:53:57 +0100 Subject: [PATCH 27/54] ShaderGen: Cleanup uid data writing. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 50 +++---- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 141 +++++++++--------- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 38 ++--- 3 files changed, 116 insertions(+), 113 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index a58809942a..e8b30d86a8 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -24,18 +24,17 @@ #include "NativeVertexFormat.h" #include "XFMemory.h" -// T.uid_data needs to have a struct named lighting_uid -template -void GenerateLightShader(T& object, int index, int litchan_index, const char* lightsName, int coloralpha) +// uid_data needs to have a struct named lighting_uid +template +void GenerateLightShader(T& object, UidDataT& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) { -#define SetUidField(name, value) if (&object.GetUidData() != NULL) { object.GetUidData().name = value; }; const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; const char* swizzle = "xyzw"; if (coloralpha == 1 ) swizzle = "xyz"; else if (coloralpha == 2 ) swizzle = "w"; - SetUidField(lit_chans[litchan_index].attnfunc, chan.attnfunc); - SetUidField(lit_chans[litchan_index].diffusefunc, chan.diffusefunc); + uid_data.lit_chans[litchan_index].attnfunc = chan.attnfunc; + uid_data.lit_chans[litchan_index].diffusefunc = chan.diffusefunc; if (!(chan.attnfunc & 1)) { // atten disabled switch (chan.diffusefunc) { @@ -94,8 +93,8 @@ void GenerateLightShader(T& object, int index, int litchan_index, const char* li // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps -template -void GenerateLightingShader(T& object, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +template +void GenerateLightingShader(T& object, UidDataT& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) { @@ -104,7 +103,7 @@ void GenerateLightingShader(T& object, int components, const char* materialsName object.Write("{\n"); - SetUidField(lit_chans[j].matsource, xfregs.color[j].matsource); + uid_data.lit_chans[j].matsource = xfregs.color[j].matsource; if (color.matsource) {// from vertex if (components & (VB_HAS_COL0 << j)) object.Write("mat = %s%d;\n", inColorName, j); @@ -116,9 +115,9 @@ void GenerateLightingShader(T& object, int components, const char* materialsName else // from color object.Write("mat = %s[%d];\n", materialsName, j+2); - SetUidField(lit_chans[j].enablelighting, xfregs.color[j].enablelighting); + uid_data.lit_chans[j].enablelighting = xfregs.color[j].enablelighting; if (color.enablelighting) { - SetUidField(lit_chans[j].ambsource, xfregs.color[j].ambsource); + uid_data.lit_chans[j].ambsource = xfregs.color[j].ambsource; if (color.ambsource) { // from vertex if (components & (VB_HAS_COL0<(object, i, j, lightsName, 3); + GenerateLightShader(object, uid_data, i, j, lightsName, 3); } } } @@ -198,9 +197,9 @@ void GenerateLightingShader(T& object, int components, const char* materialsName for (int i = 0; i < 8; ++i) { if (!(mask&(1<(object, i, j, lightsName, 1); + GenerateLightShader(object, uid_data, i, j, lightsName, 1); if (!(mask&(1<(object, i, j+2, lightsName, 2); + GenerateLightShader(object, uid_data, i, j+2, lightsName, 2); } } else if (color.enablelighting || alpha.enablelighting) @@ -210,17 +209,16 @@ void GenerateLightingShader(T& object, int components, const char* materialsName const int lit_index = color.enablelighting ? j : (j+2); int coloralpha = color.enablelighting ? 1 : 2; - SetUidField(lit_chans[lit_index].light_mask, workingchannel.GetFullLightMask()); + uid_data.lit_chans[lit_index].light_mask = workingchannel.GetFullLightMask(); for (int i = 0; i < 8; ++i) { if (workingchannel.GetFullLightMask() & (1<(object, i, lit_index, lightsName, coloralpha); + GenerateLightShader(object, uid_data, i, lit_index, lightsName, coloralpha); } } object.Write("%s%d = mat * saturate(lacc);\n", dest, j); object.Write("}\n"); } } -#undef SetUidField #endif // _LIGHTINGSHADERGEN_H_ diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 982f139fea..d9d45902fe 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -37,10 +37,10 @@ // output is given by .outreg // tevtemp is set according to swapmodetables and -template static void WriteStage(char *&p, int n, API_TYPE ApiType); +template static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType); template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -template static void WriteAlphaTest(T& out, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template static void WriteFog(T& out); +template static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +template static void WriteFog(T& out, pixel_shader_uid_data& uid_data); static const char *tevKSelTableC[] = // KCSEL { @@ -272,13 +272,15 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u // TODO: Where does this TODO belong again...? // TODO: Can be optimized if using alpha pass -#define SetUidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name = value; }; -#define OR_UidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name |= value; }; + // Non-uid template parameters will write to the dummy data (=> gets optimized out) + pixel_shader_uid_data dummy_data; + pixel_shader_uid_data& uid_data = (&out.GetUidData() != NULL) ? out.GetUidData() : dummy_data; + out.SetBuffer(text); if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, "C"); // Reset locale for compilation -/// text[sizeof(text) - 1] = 0x7C; // canary + text[sizeof(text) - 1] = 0x7C; // canary unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; @@ -289,12 +291,12 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", numStages, numTexgen/*, bpmem.genMode.numindstages*/); -// SetUidField(components, components); // TODO: Enable once per pixel lighting is implemented again - SetUidField(dstAlphaMode, dstAlphaMode); +// uid_data.components = components; // TODO: Enable once per pixel lighting is implemented again + uid_data.dstAlphaMode = dstAlphaMode; - SetUidField(genMode.numindstages, bpmem.genMode.numindstages); - SetUidField(genMode.numtevstages, bpmem.genMode.numtevstages); - SetUidField(genMode.numtexgens, bpmem.genMode.numtexgens); + uid_data.genMode.numindstages = bpmem.genMode.numindstages; + uid_data.genMode.numtevstages = bpmem.genMode.numtevstages; + uid_data.genMode.numtexgens = bpmem.genMode.numtexgens; if (ApiType == API_OPENGL) { @@ -461,7 +463,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - SetUidField(xfregs_numTexGen_numTexGens, xfregs.numTexGen.numTexGens); + uid_data.xfregs_numTexGen_numTexGens = xfregs.numTexGen.numTexGens; if (xfregs.numTexGen.numTexGens < 7) { out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); @@ -498,7 +500,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides - SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); + uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection; if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { out.Write("\tif (uv%d.z != 0.0f)", i); @@ -520,7 +522,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } } - SetUidField(nIndirectStagesUsed, nIndirectStagesUsed); + uid_data.nIndirectStagesUsed = nIndirectStagesUsed; for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) { if (nIndirectStagesUsed & (1 << i)) @@ -531,23 +533,23 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u /// TODO: Cleanup... if (i == 0) { - SetUidField(tevindref.bc0, texcoord); - SetUidField(tevindref.bi0, texmap); + uid_data.tevindref.bc0 = texcoord; + uid_data.tevindref.bi0 = texmap; } else if (i == 1) { - SetUidField(tevindref.bc1, texcoord); - SetUidField(tevindref.bi1, texmap); + uid_data.tevindref.bc1 = texcoord; + uid_data.tevindref.bi1 = texmap; } else if (i == 2) { - SetUidField(tevindref.bc3, texcoord); - SetUidField(tevindref.bi2, texmap); + uid_data.tevindref.bc3 = texcoord; + uid_data.tevindref.bi2 = texmap; } else { - SetUidField(tevindref.bc4, texcoord); - SetUidField(tevindref.bi4, texmap); + uid_data.tevindref.bc4 = texcoord; + uid_data.tevindref.bi4 = texmap; } if (texcoord < numTexgen) { @@ -576,17 +578,17 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u // Uid fields for BuildSwapModeTable are set in WriteStage BuildSwapModeTable(); for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, i, ApiType); // build the equation for this stage + WriteStage(out, uid_data, i, ApiType); // build the equation for this stage if (numStages) { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register - SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); // TODO: These probably don't need to be set anymore here... - SetUidField(combiners[numStages-1].alphaC.dest, bpmem.combiners[numStages-1].alphaC.dest); + uid_data.combiners[numStages-1].colorC.dest = bpmem.combiners[numStages-1].colorC.dest; // TODO: These probably don't need to be set anymore here... + uid_data.combiners[numStages-1].alphaC.dest = bpmem.combiners[numStages-1].alphaC.dest; if(bpmem.combiners[numStages - 1].colorC.dest != 0) { -/// SetUidField(combiners[numStages-1].colorC.dest, bpmem.combiners[numStages-1].colorC.dest); +/// uid_data.combiners[numStages-1].colorC.dest = bpmem.combiners[numStages-1].colorC.dest; bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; out.Write("\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; @@ -603,9 +605,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("\tprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); - SetUidField(Pretest, Pretest); + uid_data.Pretest = Pretest; if (Pretest == AlphaTest::UNDETERMINED) - WriteAlphaTest(out, ApiType, dstAlphaMode, per_pixel_depth); + WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, per_pixel_depth); // the screen space depth value = far z + (clip z / clip w) * z range @@ -619,9 +621,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } // Note: depth textures are disabled if early depth test is enabled - SetUidField(ztex.op, bpmem.ztex2.op); - SetUidField(per_pixel_depth, per_pixel_depth); - SetUidField(fog.fsel, bpmem.fog.c_proj_fsel.fsel); + uid_data.ztex.op = bpmem.ztex2.op; + uid_data.per_pixel_depth = per_pixel_depth; + uid_data.fog.fsel = bpmem.fog.c_proj_fsel.fsel; // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; @@ -650,7 +652,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } else { - WriteFog(out); + WriteFog(out, uid_data); out.Write("\tocol0 = prev;\n"); } @@ -666,8 +668,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } out.Write("}\n"); -/// if (text[sizeof(text) - 1] != 0x7C) -/// PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); + + if (text[sizeof(text) - 1] != 0x7C) + PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, ""); // restore locale @@ -720,7 +723,7 @@ static const char *TEVCMPAlphaOPTable[16] = template -static void WriteStage(T& out, int n, API_TYPE ApiType) +static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -732,14 +735,14 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) out.Write("// TEV stage %d\n", n); - OR_UidField(bHasIndStage, bHasIndStage << n); - if (n < 8) { OR_UidField(tevorders_n_texcoord1, texcoord << (3 * n)); } - else OR_UidField(tevorders_n_texcoord2, texcoord << (3 * n - 24)); + uid_data.bHasIndStage |= bHasIndStage << n; + if (n < 8) { uid_data.tevorders_n_texcoord1 |= texcoord << (3 * n); } + else uid_data.tevorders_n_texcoord2 |= texcoord << (3 * n - 24); if (bHasIndStage) { - OR_UidField(tevind_n_bs, bpmem.tevind[n].bs << (2*n)); - OR_UidField(tevind_n_bt, bpmem.tevind[n].bt << (2*n)); - OR_UidField(tevind_n_fmt, bpmem.tevind[n].fmt << (2*n)); + uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n); + uid_data.tevind_n_bt |= bpmem.tevind[n].bt << (2*n); + uid_data.tevind_n_fmt |= bpmem.tevind[n].fmt << (2*n); out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords @@ -754,14 +757,14 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias - if (n < 8) { OR_UidField(tevind_n_bias1, bpmem.tevind[n].bias << (3*n)); } /// XXX: brackets? - else OR_UidField(tevind_n_bias2, bpmem.tevind[n].bias << (3*n - 24)); + if (n < 8) { uid_data.tevind_n_bias1 |= bpmem.tevind[n].bias << (3*n); } /// XXX: brackets? + else uid_data.tevind_n_bias2 |= bpmem.tevind[n].bias << (3*n - 24); if (bpmem.tevind[n].bias != ITB_NONE ) out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale - if (n < 8) { OR_UidField(tevind_n_mid1, bpmem.tevind[n].mid << (4*n)); } /// XXX: brackets? - else OR_UidField(tevind_n_mid2, bpmem.tevind[n].mid << (4*n - 32)); + if (n < 8) { uid_data.tevind_n_mid1 |= bpmem.tevind[n].mid << (4*n); } /// XXX: brackets? + else uid_data.tevind_n_mid2 |= bpmem.tevind[n].mid << (4*n - 32); if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) @@ -795,12 +798,12 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) // Wrapping // --------- - if (n < 8) { OR_UidField(tevind_n_sw1, bpmem.tevind[n].sw << (3 * n)); } - else OR_UidField(tevind_n_sw2, bpmem.tevind[n].sw << (3 * n - 24)); - if (n < 8) { OR_UidField(tevind_n_tw1, bpmem.tevind[n].tw << (3 * n)); } - else OR_UidField(tevind_n_tw2, bpmem.tevind[n].tw << (3 * n - 24)); + if (n < 8) { uid_data.tevind_n_sw1 |= bpmem.tevind[n].sw << (3 * n); } + else uid_data.tevind_n_sw2 |= bpmem.tevind[n].sw << (3 * n - 24); + if (n < 8) { uid_data.tevind_n_tw1 |= bpmem.tevind[n].tw << (3 * n); } + else uid_data.tevind_n_tw2 |= bpmem.tevind[n].tw << (3 * n - 24); - OR_UidField(tevind_n_fb_addprev, bpmem.tevind[n].fb_addprev << n); + uid_data.tevind_n_fb_addprev |= bpmem.tevind[n].fb_addprev << n; // wrap S if (bpmem.tevind[n].sw == ITW_OFF) @@ -827,8 +830,8 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - SetUidField(combiners[n].colorC.hex, cc.hex&0xFFFFFF); - SetUidField(combiners[n].alphaC.hex, ac.hex&0xFFFFFF); + uid_data.combiners[n].colorC.hex = cc.hex & 0xFFFFFF; + uid_data.combiners[n].alphaC.hex = ac.hex & 0xFFFFFF; if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC @@ -838,10 +841,10 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap1 << (i*2)); - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap1 << (i*2 + 1)); - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap2 << (i*2 + 16)); - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap2 << (i*2 + 17)); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap1 << (i*2); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap1 << (i*2 + 1); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap2 << (i*2 + 16); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap2 << (i*2 + 17); char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); @@ -861,10 +864,10 @@ static void WriteStage(T& out, int n, API_TYPE ApiType) } const int i = bpmem.combiners[n].alphaC.tswap; - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap1 << (i*2)); - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap1 << (i*2 + 1)); - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2 ].swap2 << (i*2 + 16)); - OR_UidField(tevksel_n_swap, bpmem.tevksel[i*2+1].swap2 << (i*2 + 17)); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap1 << (i*2); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap1 << (i*2 + 1); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap2 << (i*2 + 16); + uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap2 << (i*2 + 17); char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); @@ -1116,7 +1119,7 @@ static const char *tevAlphaFunclogicTable[] = }; template -static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) +static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) { static const char *alphaRef[2] = { @@ -1129,9 +1132,9 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, // using discard then return works the same in cg and dx9 but not in dx11 out.Write("\tif(!( "); - SetUidField(alpha_test.comp0, bpmem.alpha_test.comp0); - SetUidField(alpha_test.logic, bpmem.alpha_test.comp1); - SetUidField(alpha_test.logic, bpmem.alpha_test.logic); + uid_data.alpha_test.comp0 = bpmem.alpha_test.comp0; + uid_data.alpha_test.logic = bpmem.alpha_test.comp1; + uid_data.alpha_test.logic = bpmem.alpha_test.logic; // Lookup the first component from the alpha function table int compindex = bpmem.alpha_test.comp0; @@ -1160,7 +1163,7 @@ static void WriteAlphaTest(T& out, API_TYPE ApiType, DSTALPHA_MODE dstAlphaMode, // when the alpha test fail. This is not a correct implementation because // even if the depth test fails the fragment could be alpha blended, but // we don't have a choice. - SetUidField(alpha_test.use_zcomploc_hack, bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable); + uid_data.alpha_test.use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable; if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) { out.Write("\t\tdiscard;\n"); @@ -1184,13 +1187,13 @@ static const char *tevFogFuncsTable[] = }; template -static void WriteFog(T& out) +static void WriteFog(T& out, pixel_shader_uid_data& uid_data) { - SetUidField(fog.fsel, bpmem.fog.c_proj_fsel.fsel); + uid_data.fog.fsel = bpmem.fog.c_proj_fsel.fsel; if(bpmem.fog.c_proj_fsel.fsel == 0) return; // no Fog - SetUidField(fog.proj, bpmem.fog.c_proj_fsel.proj); + uid_data.fog.proj = bpmem.fog.c_proj_fsel.proj; out.SetConstantsUsed(C_FOG, C_FOG+1); if (bpmem.fog.c_proj_fsel.proj == 0) @@ -1209,7 +1212,7 @@ static void WriteFog(T& out) // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. - SetUidField(fog.RangeBaseEnabled, bpmem.fogRange.Base.Enabled); + uid_data.fog.RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if(bpmem.fogRange.Base.Enabled) { out.SetConstantsUsed(C_FOG+2, C_FOG+2); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 869b84a4ed..5ed1a73569 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -83,14 +83,15 @@ extern const char *WriteLocation(API_TYPE api_type); template void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { -#undef SetUidField -#define SetUidField(name, value) if (&out.GetUidData() != NULL) {out.GetUidData().name = value; }; + // Non-uid template parameters will write to the dummy data (=> gets optimized out) + vertex_shader_uid_data dummy_data; + vertex_shader_uid_data& uid_data = (&out.GetUidData() != NULL) ? out.GetUidData() : dummy_data; out.SetBuffer(text); if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, "C"); // Reset locale for compilation -// text[sizeof(text) - 1] = 0x7C; // canary + text[sizeof(text) - 1] = 0x7C; // canary _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); @@ -122,8 +123,8 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) GenerateVSOutputStruct(out, components, api_type); - SetUidField(numTexGens, xfregs.numTexGen.numTexGens); - SetUidField(components, components); + uid_data.numTexGens = xfregs.numTexGen.numTexGens; + uid_data.components = components; if(api_type == API_OPENGL) { @@ -262,7 +263,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) "float3 ldir, h;\n" "float dist, dist2, attn;\n"); - SetUidField(numColorChans, xfregs.numChan.numColorChans); + uid_data.numColorChans = xfregs.numChan.numColorChans; if (xfregs.numChan.numColorChans == 0) { if (components & VB_HAS_COL0) @@ -272,7 +273,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) } // TODO: This probably isn't necessary if pixel lighting is enabled. - GenerateLightingShader(out, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + GenerateLightingShader(out, uid_data, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); if (xfregs.numChan.numColorChans < 2) { @@ -297,7 +298,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) out.Write("{\n"); out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); - SetUidField(texMtxInfo[i].sourcerow, xfregs.texMtxInfo[i].sourcerow); + uid_data.texMtxInfo[i].sourcerow = xfregs.texMtxInfo[i].sourcerow; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); @@ -332,21 +333,21 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) } // first transformation - SetUidField(texMtxInfo[i].texgentype, xfregs.texMtxInfo[i].texgentype); + uid_data.texMtxInfo[i].texgentype = xfregs.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { // transform the light dir into tangent space - SetUidField(texMtxInfo[i].embosslightshift, xfregs.texMtxInfo[i].embosslightshift); - SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); + uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift; + uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" I_LIGHTS"[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); } else { _assert_(0); // should have normals - SetUidField(texMtxInfo[i].embosssourceshift, xfregs.texMtxInfo[i].embosssourceshift); + uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } @@ -361,7 +362,7 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) break; case XF_TEXGEN_REGULAR: default: - SetUidField(texMtxInfo[i].projection, xfregs.texMtxInfo[i].projection); + uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection; if (components & (VB_HAS_TEXMTXIDX0< Date: Fri, 29 Mar 2013 22:24:49 +0100 Subject: [PATCH 28/54] ShaderGen: More interface cleanups. Less wtfs :) --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 3 ++- Source/Core/VideoCommon/Src/PixelShaderGen.h | 4 ++-- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 20 ++++++++----------- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 7 ++++--- Source/Core/VideoCommon/Src/VertexShaderGen.h | 4 ++-- .../Src/LineGeometryShader.cpp | 2 +- .../Src/PointGeometryShader.cpp | 2 +- .../Src/ProgramShaderCache.cpp | 6 +++--- 8 files changed, 23 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index d9d45902fe..e534e2a6ca 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -274,7 +274,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; - pixel_shader_uid_data& uid_data = (&out.GetUidData() != NULL) ? out.GetUidData() : dummy_data; + pixel_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) + ? out.template GetUidData() : dummy_data; out.SetBuffer(text); if (out.GetBuffer() != NULL) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 4f9cba0c37..4881999206 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -190,8 +190,8 @@ struct pixel_shader_uid_data }; typedef ShaderUid PixelShaderUid; -typedef ShaderCode PixelShaderCode; -typedef ShaderConstantProfile PixelShaderConstantProfile; +typedef ShaderCode PixelShaderCode; // TODO: Obsolete +typedef ShaderConstantProfile PixelShaderConstantProfile; // TODO: Obsolete void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index a2616e0a6c..a904a8e635 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -25,7 +25,6 @@ #include -template class ShaderGeneratorInterface { public: @@ -33,11 +32,13 @@ public: const char* GetBuffer() { return NULL; } void SetBuffer(char* buffer) { } inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} - uid_data& GetUidData() { return *(uid_data*)NULL; } // TODO: can be moved out, just make this GetUidData instead + + template + uid_data& GetUidData() { return *(uid_data*)NULL; } }; template -class ShaderUid : public ShaderGeneratorInterface +class ShaderUid : public ShaderGeneratorInterface { public: ShaderUid() @@ -69,7 +70,8 @@ public: return false; } - inline uid_data& GetUidData() { return data; } + template + inline T& GetUidData() override { return data; } private: union @@ -79,9 +81,7 @@ private: }; }; -// Needs to be a template for hacks... -template -class ShaderCode : public ShaderGeneratorInterface +class ShaderCode : public ShaderGeneratorInterface { public: ShaderCode() : buf(NULL), write_ptr(NULL) @@ -105,15 +105,11 @@ private: char* write_ptr; }; -template -class ShaderConstantProfile : public ShaderGeneratorInterface +class ShaderConstantProfile : public ShaderGeneratorInterface { public: ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); } - // has room for optimization (if it matters at all...) - void NumConstants() { return constant_usage.size(); } - inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) { for (unsigned int i = first_index; i < last_index+1; ++i) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 5ed1a73569..e27ced7b39 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -85,7 +85,8 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; - vertex_shader_uid_data& uid_data = (&out.GetUidData() != NULL) ? out.GetUidData() : dummy_data; + vertex_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) + ? out.template GetUidData() : dummy_data; out.SetBuffer(text); if (out.GetBuffer() != NULL) @@ -535,7 +536,7 @@ void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE GenerateVertexShader(object, components, api_type); } -void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type) +void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type) { - GenerateVSOutputStruct >(object, components, api_type); + GenerateVSOutputStruct(object, components, api_type); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index b6b176c806..33d27ecdf0 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -110,10 +110,10 @@ struct vertex_shader_uid_data }; typedef ShaderUid VertexShaderUid; -typedef ShaderCode VertexShaderCode; +typedef ShaderCode VertexShaderCode; // TODO: Obsolete.. void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type); void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type); -void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type); +void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type); #endif // GCOGL_VERTEXSHADER_H diff --git a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp index f22ebef0df..5d6b8eb5b5 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/LineGeometryShader.cpp @@ -183,7 +183,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth, { // Generate new shader. Warning: not thread-safe. static char buffer[16384]; - ShaderCode code; + ShaderCode code; code.SetBuffer(buffer); GenerateVSOutputStructForGS(code, components, API_D3D11); code.Write("\n%s", LINE_GS_COMMON); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp index 5976c09ab9..8f5300ff64 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PointGeometryShader.cpp @@ -177,7 +177,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize, { // Generate new shader. Warning: not thread-safe. static char buffer[16384]; - ShaderCode code; + ShaderCode code; code.SetBuffer(buffer); GenerateVSOutputStructForGS(code, components, API_D3D11); code.Write("\n%s", POINT_GS_COMMON); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 188145ebcf..81621d2d38 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -360,9 +360,9 @@ template UidT GetPartialUid(const SHADERUID& uid); template<> PixelShaderUid GetPartialUid(const SHADERUID& uid) { return uid.puid; } template<> VertexShaderUid GetPartialUid(const SHADERUID& uid) { return uid.vuid; } -template const std::string& GetShaderCode(const SHADER& shader); -template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strpprog; } -template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strvprog; } +template const std::string& GetShaderCode(const SHADER& shader); +template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strpprog; } +template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strvprog; } template void CheckForUidMismatch(const ProgramShaderCache::PCache& cache, CodeT& new_code, const UidT& new_uid) From cdddb26bbafce034143465da7d2e250cded5fd53 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 29 Mar 2013 22:29:37 +0100 Subject: [PATCH 29/54] Apparently override is less fun than I thought. --- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index a904a8e635..eabd6f7c07 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -71,7 +71,7 @@ public: } template - inline T& GetUidData() override { return data; } + inline T& GetUidData() { return data; } private: union From 248d56d930d8d6f7cae04efc0c1521c83676c074 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 31 Mar 2013 20:55:57 +0200 Subject: [PATCH 30/54] ShaderGen: Small optimization. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 4 +-- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 32 +++++++++++++++++-- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 28 +++++++--------- Source/Core/VideoCommon/Src/VertexShaderGen.h | 4 ++- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index e8b30d86a8..40a8fb2732 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -26,7 +26,7 @@ // uid_data needs to have a struct named lighting_uid template -void GenerateLightShader(T& object, UidDataT& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) +static void GenerateLightShader(T& object, UidDataT& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) { const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; const char* swizzle = "xyzw"; @@ -94,7 +94,7 @@ void GenerateLightShader(T& object, UidDataT& uid_data, int index, int litchan_i // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps template -void GenerateLightingShader(T& object, UidDataT& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +static void GenerateLightingShader(T& object, UidDataT& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) { diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index eabd6f7c07..ef88d411ea 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -21,10 +21,11 @@ #include #include #include -#include "CommonTypes.h" - #include +#include "CommonTypes.h" +#include "VideoCommon.h" + class ShaderGeneratorInterface { public: @@ -125,4 +126,31 @@ private: std::vector constant_usage; // TODO: Is vector appropriate here? }; +template +static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num) +{ + if (ApiType == API_OPENGL) + return; // Nothing to do here + + object.Write(" : register(%s%d)", prefix, num); +} + +template +static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos) +{ + if (using_ubos) + return; + + object.Write("uniform "); +} + +template +static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name) +{ + WriteLocation(object, api_type, using_ubos); + object.Write("%s %s ", type, name); + WriteRegister(object, api_type, "c", num); + object.Write(";\n"); +} + #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index e27ced7b39..44bad82471 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -29,7 +29,7 @@ static char text[16768]; template -void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) +static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1) { object.Write(" %s %s", type, name); if (var_index != -1) @@ -48,7 +48,7 @@ void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, // TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? template -void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) +static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) { object.Write("struct VS_OUTPUT {\n"); DefineVSOutputStructMember(object, api_type, "float4", "pos", -1, "POSITION"); @@ -76,12 +76,8 @@ void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) object.Write("};\n"); } -// TODO: Seriously? -.- -extern const char *WriteRegister(API_TYPE api_type, const char *prefix, const u32 num); -extern const char *WriteLocation(API_TYPE api_type); - template -void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) +static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; @@ -108,16 +104,16 @@ void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("layout(std140) uniform VSBlock {\n"); - out.Write("%sfloat4 " I_POSNORMALMATRIX"[6] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_POSNORMALMATRIX)); - out.Write("%sfloat4 " I_PROJECTION"[4] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_PROJECTION)); - out.Write("%sfloat4 " I_MATERIALS"[4] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_MATERIALS)); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PROJECTION, "float4", I_PROJECTION"[4]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_MATERIALS, "float4", I_MATERIALS"[4]"); out.Write("struct Light { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; };\n"); - out.Write("%sLight " I_LIGHTS"[8] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_LIGHTS)); - out.Write("%sfloat4 " I_TEXMATRICES"[24] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_TEXMATRICES)); // also using tex matrices - out.Write("%sfloat4 " I_TRANSFORMMATRICES"[64] %s;\n", WriteLocation(api_type),WriteRegister(api_type, "c", C_TRANSFORMMATRICES)); - out.Write("%sfloat4 " I_NORMALMATRICES"[32] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_NORMALMATRICES)); - out.Write("%sfloat4 " I_POSTTRANSFORMMATRICES"[64] %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_POSTTRANSFORMMATRICES)); - out.Write("%sfloat4 " I_DEPTHPARAMS" %s;\n", WriteLocation(api_type), WriteRegister(api_type, "c", C_DEPTHPARAMS)); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_LIGHTS, "Light", I_LIGHTS"[8]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("};\n"); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index 33d27ecdf0..ea81aba468 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -76,7 +76,8 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 }, {I_DEPTHPARAMS, C_DEPTHPARAMS, 1 }, }; -// TODO: Need packing? +#pragma pack(4) + struct vertex_shader_uid_data { @@ -108,6 +109,7 @@ struct vertex_shader_uid_data u32 light_mask : 8; } lit_chans[4]; }; +#pragma pack() typedef ShaderUid VertexShaderUid; typedef ShaderCode VertexShaderCode; // TODO: Obsolete.. From f6d65a636ef9c8a75b410314b94384a2ae2d7e62 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 31 Mar 2013 23:29:33 +0200 Subject: [PATCH 31/54] ShaderGen: Fix per pixel lighting. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 19 ++++++++----------- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 10 +++++----- Source/Core/VideoCommon/Src/PixelShaderGen.h | 2 ++ Source/Core/VideoCommon/Src/ShaderGenCommon.h | 13 +++++++++++++ .../Core/VideoCommon/Src/VertexShaderGen.cpp | 2 +- Source/Core/VideoCommon/Src/VertexShaderGen.h | 11 ++--------- Source/Core/VideoCommon/Src/VideoConfig.h | 2 +- 7 files changed, 32 insertions(+), 27 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index 40a8fb2732..ff19b737f4 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -18,15 +18,12 @@ #ifndef _LIGHTINGSHADERGEN_H_ #define _LIGHTINGSHADERGEN_H_ -#include - #include "ShaderGenCommon.h" #include "NativeVertexFormat.h" #include "XFMemory.h" -// uid_data needs to have a struct named lighting_uid -template -static void GenerateLightShader(T& object, UidDataT& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) +template +static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) { const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; const char* swizzle = "xyzw"; @@ -93,8 +90,8 @@ static void GenerateLightShader(T& object, UidDataT& uid_data, int index, int li // materials name is I_MATERIALS in vs and I_PMATERIALS in ps // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps -template -static void GenerateLightingShader(T& object, UidDataT& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +template +static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) { @@ -187,7 +184,7 @@ static void GenerateLightingShader(T& object, UidDataT& uid_data, int components { if (mask & (1<(object, uid_data, i, j, lightsName, 3); + GenerateLightShader(object, uid_data, i, j, lightsName, 3); } } } @@ -197,9 +194,9 @@ static void GenerateLightingShader(T& object, UidDataT& uid_data, int components for (int i = 0; i < 8; ++i) { if (!(mask&(1<(object, uid_data, i, j, lightsName, 1); + GenerateLightShader(object, uid_data, i, j, lightsName, 1); if (!(mask&(1<(object, uid_data, i, j+2, lightsName, 2); + GenerateLightShader(object, uid_data, i, j+2, lightsName, 2); } } else if (color.enablelighting || alpha.enablelighting) @@ -213,7 +210,7 @@ static void GenerateLightingShader(T& object, UidDataT& uid_data, int components for (int i = 0; i < 8; ++i) { if (workingchannel.GetFullLightMask() & (1<(object, uid_data, i, lit_index, lightsName, coloralpha); + GenerateLightShader(object, uid_data, i, lit_index, lightsName, coloralpha); } } object.Write("%s%d = mat * saturate(lacc);\n", dest, j); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index e534e2a6ca..ceb2230684 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -341,12 +341,12 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("\t%sfloat4 " I_INDTEXSCALE"[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXSCALE)); out.Write("\t%sfloat4 " I_INDTEXMTX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXMTX)); out.Write("\t%sfloat4 " I_FOG"[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG)); - + // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! out.Write("struct Light { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; };\n"); out.Write("\t%sLight " I_PLIGHTS"[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); out.Write("\t%sfloat4 " I_PMATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS)); - + if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("};\n"); @@ -366,7 +366,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("float4 colors_1 = colors_12;\n"); // compute window position if needed because binding semantic WPOS is not widely supported - // Let's set up attributes + // Let's set up attributes if (xfregs.numTexGen.numTexGens < 7) { for (int i = 0; i < 8; ++i) @@ -479,10 +479,10 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("\tfloat4 mat, lacc;\n" "\tfloat3 ldir, h;\n" "\tfloat dist, dist2, attn;\n"); -/// TODO + out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); -/// p = GenerateLightingShader(p, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); + GenerateLightingShader(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } if (numTexgen < 7) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 4881999206..98afec2c5b 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -187,6 +187,8 @@ struct pixel_shader_uid_data u32 bHasIndStage : 16; u32 xfregs_numTexGen_numTexGens : 4; + + LightingUidData lighting; }; typedef ShaderUid PixelShaderUid; diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index ef88d411ea..4d41cd62f1 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -153,4 +153,17 @@ static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const object.Write(";\n"); } +struct LightingUidData +{ + struct + { + u32 matsource : 1; + u32 enablelighting : 1; + u32 ambsource : 1; + u32 diffusefunc : 2; + u32 attnfunc : 2; + u32 light_mask : 8; + } lit_chans[4]; +}; + #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 44bad82471..82e7a7fdce 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -270,7 +270,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) } // TODO: This probably isn't necessary if pixel lighting is enabled. - GenerateLightingShader(out, uid_data, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + GenerateLightingShader(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); if (xfregs.numChan.numColorChans < 2) { diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h index ea81aba468..9c44ba0448 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.h +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h @@ -79,7 +79,6 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 }, #pragma pack(4) struct vertex_shader_uid_data - { u32 components; u32 numColorChans : 2; @@ -100,14 +99,8 @@ struct vertex_shader_uid_data struct { u32 enabled : 1; } dualTexTrans; - struct { - u32 matsource : 1; - u32 enablelighting : 1; - u32 ambsource : 1; - u32 diffusefunc : 2; - u32 attnfunc : 2; - u32 light_mask : 8; - } lit_chans[4]; + + LightingUidData lighting; }; #pragma pack() diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 99fff0d167..442e95f9bf 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -135,7 +135,7 @@ struct VideoConfig int iAdapter; // Debugging - bool bEnableShaderDebugging; // TODO: Obsolete? + bool bEnableShaderDebugging; // Static config per API // TODO: Move this out of VideoConfig From f57b902d3369e1780d61283f0d04d0380923db2b Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 31 Mar 2013 23:53:46 +0200 Subject: [PATCH 32/54] PixelShaderGen: Cleanups. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 47 +++------------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 55 +++++++++++++++---- 2 files changed, 52 insertions(+), 50 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index ceb2230684..8d1d268197 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -531,27 +531,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u unsigned int texcoord = bpmem.tevindref.getTexCoord(i); unsigned int texmap = bpmem.tevindref.getTexMap(i); - /// TODO: Cleanup... - if (i == 0) - { - uid_data.tevindref.bc0 = texcoord; - uid_data.tevindref.bi0 = texmap; - } - else if (i == 1) - { - uid_data.tevindref.bc1 = texcoord; - uid_data.tevindref.bi1 = texmap; - } - else if (i == 2) - { - uid_data.tevindref.bc3 = texcoord; - uid_data.tevindref.bi2 = texmap; - } - else - { - uid_data.tevindref.bc4 = texcoord; - uid_data.tevindref.bi4 = texmap; - } + uid_data.tevindref.SetValues(i, texcoord, texmap); if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); @@ -585,11 +565,8 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register - uid_data.combiners[numStages-1].colorC.dest = bpmem.combiners[numStages-1].colorC.dest; // TODO: These probably don't need to be set anymore here... - uid_data.combiners[numStages-1].alphaC.dest = bpmem.combiners[numStages-1].alphaC.dest; if(bpmem.combiners[numStages - 1].colorC.dest != 0) { -/// uid_data.combiners[numStages-1].colorC.dest = bpmem.combiners[numStages-1].colorC.dest; bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; out.Write("\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; @@ -741,9 +718,9 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE else uid_data.tevorders_n_texcoord2 |= texcoord << (3 * n - 24); if (bHasIndStage) { - uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n); - uid_data.tevind_n_bt |= bpmem.tevind[n].bt << (2*n); - uid_data.tevind_n_fmt |= bpmem.tevind[n].fmt << (2*n); + uid_data.tevind_n.bs |= bpmem.tevind[n].bs << (2*n); + uid_data.tevind_n.bt |= bpmem.tevind[n].bt << (2*n); + uid_data.tevind_n.fmt |= bpmem.tevind[n].fmt << (2*n); out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords @@ -758,14 +735,12 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias - if (n < 8) { uid_data.tevind_n_bias1 |= bpmem.tevind[n].bias << (3*n); } /// XXX: brackets? - else uid_data.tevind_n_bias2 |= bpmem.tevind[n].bias << (3*n - 24); + uid_data.tevind_n.Set_bias(n, bpmem.tevind[n].bias); if (bpmem.tevind[n].bias != ITB_NONE ) out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale - if (n < 8) { uid_data.tevind_n_mid1 |= bpmem.tevind[n].mid << (4*n); } /// XXX: brackets? - else uid_data.tevind_n_mid2 |= bpmem.tevind[n].mid << (4*n - 32); + uid_data.tevind_n.Set_mid(n, bpmem.tevind[n].mid); if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) @@ -798,13 +773,9 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE // --------- // Wrapping // --------- - - if (n < 8) { uid_data.tevind_n_sw1 |= bpmem.tevind[n].sw << (3 * n); } - else uid_data.tevind_n_sw2 |= bpmem.tevind[n].sw << (3 * n - 24); - if (n < 8) { uid_data.tevind_n_tw1 |= bpmem.tevind[n].tw << (3 * n); } - else uid_data.tevind_n_tw2 |= bpmem.tevind[n].tw << (3 * n - 24); - - uid_data.tevind_n_fb_addprev |= bpmem.tevind[n].fb_addprev << n; + uid_data.tevind_n.Set_sw(n, bpmem.tevind[n].sw); + uid_data.tevind_n.Set_tw(n, bpmem.tevind[n].tw); + uid_data.tevind_n.fb_addprev |= bpmem.tevind[n].fb_addprev << n; // wrap S if (bpmem.tevind[n].sw == ITW_OFF) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 98afec2c5b..3c6db3fa8f 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -99,23 +99,54 @@ struct pixel_shader_uid_data u32 bc3 : 3; u32 bi4 : 3; u32 bc4 : 3; + void SetValues(int index, u32 texcoord, u32 texmap) + { + if (index == 0) { bc0 = texcoord; bi0 = texmap; } + else if (index == 1) { bc1 = texcoord; bi1 = texmap; } + else if (index == 2) { bc3 = texcoord; bi2 = texmap; } + else if (index == 3) { bc4 = texcoord; bi4 = texmap; } + } } tevindref; u32 tevorders_n_texcoord1 : 24; // 8 x 3 bit u32 tevorders_n_texcoord2 : 24; // 8 x 3 bit - u32 tevind_n_sw1 : 24; // 8 x 3 bit - u32 tevind_n_sw2 : 24; // 8 x 3 bit - u32 tevind_n_tw1 : 24; // 8 x 3 bit - u32 tevind_n_tw2 : 24; // 8 x 3 bit - u32 tevind_n_fb_addprev : 16; // 16 x 1 bit + struct + { + u32 sw1 : 24; // 8 x 3 bit + u32 sw2 : 24; // 8 x 3 bit + u32 tw1 : 24; // 8 x 3 bit + u32 tw2 : 24; // 8 x 3 bit + u32 fb_addprev : 16; // 16 x 1 bit + u32 bs : 32; // 16 x 2 bit + u32 fmt : 32; // 16 x 2 bit + u32 bt : 32; // 16 x 2 bit + u32 bias1 : 24; // 8 x 3 bit + u32 bias2 : 24; // 8 x 3 bit + u32 mid1 : 32; // 8 x 4 bit + u32 mid2 : 32; // 8 x 4 bit - u32 tevind_n_bs : 32; // 16 x 2 bit - u32 tevind_n_fmt : 32; // 16 x 2 bit - u32 tevind_n_bt : 32; // 16 x 2 bit - u32 tevind_n_bias1 : 24; // 8 x 3 bit - u32 tevind_n_bias2 : 24; // 8 x 3 bit - u32 tevind_n_mid1 : 32; // 8 x 4 bit - u32 tevind_n_mid2 : 32; // 8 x 4 bit + // NOTE: These assume that the affected bits are zero before calling + void Set_sw(int index, u32 val) + { + if (index < 8) sw1 |= val << (3*index); + else sw2 |= val << (3*index - 24); + } + void Set_tw(int index, u32 val) + { + if (index < 8) tw1 |= val << (3*index); + else tw2 |= val << (3*index - 24); + } + void Set_bias(int index, u32 val) + { + if (index < 8) bias1 |= val << (3*index); + else bias2 |= val << (3*index - 24); + } + void Set_mid(int index, u32 val) + { + if (index < 8) mid1 |= val << (4*index); + else mid2 |= val << (4*index - 32); + } + } tevind_n; u32 tevksel_n_swap : 32; // 8 x 2 bit (swap1) + 8 x 2 bit (swap2) struct From a60e1a3db8baae58b759b8b905436a1e52fb81e2 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 31 Mar 2013 23:57:39 +0200 Subject: [PATCH 33/54] ShaderGen: Remove some TODOs and fix an issue with per pixel lighting. --- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 6 +----- Source/Core/VideoCommon/Src/VertexShaderGen.cpp | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 8d1d268197..69c096f002 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -269,9 +269,6 @@ const char *WriteLocation(API_TYPE ApiType) template void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { - // TODO: Where does this TODO belong again...? - // TODO: Can be optimized if using alpha pass - // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; pixel_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) @@ -292,9 +289,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", numStages, numTexgen/*, bpmem.genMode.numindstages*/); -// uid_data.components = components; // TODO: Enable once per pixel lighting is implemented again uid_data.dstAlphaMode = dstAlphaMode; - uid_data.genMode.numindstages = bpmem.genMode.numindstages; uid_data.genMode.numtevstages = bpmem.genMode.numtevstages; uid_data.genMode.numtexgens = bpmem.genMode.numtexgens; @@ -482,6 +477,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); + uid_data.components = components; GenerateLightingShader(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 82e7a7fdce..48b09a9eac 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -46,7 +46,6 @@ static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* } } -// TODO: Check if something goes wrong if the cached shaders used pixel lighting but it's disabled later?? template static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type) { From ec5f596b3175e06c8d27ae2053213813db3693ef Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 12:17:28 +0200 Subject: [PATCH 34/54] VertexShaderGen: More per-pixel-lighting fixes. --- Source/Core/VideoCommon/Src/VertexShaderGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 48b09a9eac..452ee7fd3f 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -420,7 +420,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) out.Write("o.tex3.w = o.pos.w;\n"); } -/* if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) + if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { if (xfregs.numTexGen.numTexGens < 7) { out.Write("o.Normal = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n"); @@ -438,7 +438,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); - }*/ + } //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values //if not early z culling will improve speed From ec08914905ca1c4fe3b2456a9cc037a3f7eedd49 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 12:54:22 +0200 Subject: [PATCH 35/54] Move Shader UID mismatch checking to VideoCommon. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 7 ++ Source/Core/VideoCommon/Src/PixelShaderGen.h | 1 - Source/Core/VideoCommon/Src/ShaderGenCommon.h | 47 ++++++++++++++ .../Core/VideoCommon/Src/VertexShaderGen.cpp | 7 ++ .../Src/ProgramShaderCache.cpp | 64 ------------------- 5 files changed, 61 insertions(+), 65 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 69c096f002..2d9121856c 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -1207,6 +1207,13 @@ static void WriteFog(T& out, pixel_shader_uid_data& uid_data) void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { GeneratePixelShader(object, dstAlphaMode, ApiType, components); + + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_OPENGL, components); + CheckForUidMismatch(code, object); + } } void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 3c6db3fa8f..001ab53f0b 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -226,7 +226,6 @@ typedef ShaderUid PixelShaderUid; typedef ShaderCode PixelShaderCode; // TODO: Obsolete typedef ShaderConstantProfile PixelShaderConstantProfile; // TODO: Obsolete - void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 4d41cd62f1..07af93c295 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include "CommonTypes.h" #include "VideoCommon.h" @@ -166,4 +168,49 @@ struct LightingUidData } lit_chans[4]; }; +struct pixel_shader_uid_data; +struct vertex_shader_uid_data; + +typedef ShaderUid PixelShaderUid; +typedef ShaderUid VertexShaderUid; + +template +void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid) +{ + static std::map s_shaders; + static std::vector s_uids; + + bool uid_is_indexed = std::find(s_uids.begin(), s_uids.end(), new_uid) != s_uids.end(); + if (!uid_is_indexed) + { + s_uids.push_back(new_uid); + s_shaders[new_uid] = new_code.GetBuffer(); + } + else + { + // uid is already in the index => check if there's a shader with the same uid but different code + auto& old_code = s_shaders[new_uid]; + if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0) + { + static int num_failures = 0; + + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), + (typeid(UidT) == typeid(PixelShaderUid)) ? "p" : (typeid(UidT) == typeid(VertexShaderUid)) ? "v" : "o", + ++num_failures); + + // TODO: Should also dump uids + std::ofstream file; + OpenFStream(file, szTemp, std::ios_base::out); + file << "Old shader code:\n" << old_code; + file << "\n\nNew shader code:\n" << new_code.GetBuffer(); + file.close(); + + // TODO: Make this more idiot-proof + ERROR_LOG(VIDEO, "%s shader uid mismatch!", + (typeid(UidT) == typeid(PixelShaderUid)) ? "Pixel" : (typeid(UidT) == typeid(VertexShaderUid)) ? "Vertex" : "Other"); + } + } +} + #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 452ee7fd3f..7c1553890b 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -524,6 +524,13 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type) { GenerateVertexShader(object, components, api_type); + + if (g_ActiveConfig.bEnableShaderDebugging) + { + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_OPENGL); + CheckForUidMismatch(code, object); + } } void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index 81621d2d38..0d571c29a9 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -23,9 +23,6 @@ #include "ImageWrite.h" #include "Render.h" -#include -#include - namespace OGL { @@ -356,73 +353,12 @@ GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code ) return result; } -template UidT GetPartialUid(const SHADERUID& uid); -template<> PixelShaderUid GetPartialUid(const SHADERUID& uid) { return uid.puid; } -template<> VertexShaderUid GetPartialUid(const SHADERUID& uid) { return uid.vuid; } - -template const std::string& GetShaderCode(const SHADER& shader); -template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strpprog; } -template<> const std::string& GetShaderCode(const SHADER& shader) { return shader.strvprog; } - -template -void CheckForUidMismatch(const ProgramShaderCache::PCache& cache, CodeT& new_code, const UidT& new_uid) -{ - static std::map s_shaders; - static std::vector s_uids; - - bool uid_is_indexed = std::find(s_uids.begin(), s_uids.end(), new_uid) != s_uids.end(); - if (!uid_is_indexed) - { - s_uids.push_back(new_uid); - s_shaders[new_uid] = new_code.GetBuffer(); - } - else - { - // uid is already in the index => check if there's a shader with the same uid but different code - auto& old_code = s_shaders[new_uid]; - if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0) - { - static int num_failures = 0; - - char szTemp[MAX_PATH]; - sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), - (typeid(UidT) == typeid(PixelShaderUid)) ? "p" : (typeid(UidT) == typeid(VertexShaderUid)) ? "v" : "o", - ++num_failures); - - // TODO: Should also dump uids - std::ofstream file; - OpenFStream(file, szTemp, std::ios_base::out); - file << "Old shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code.GetBuffer(); - file.close(); - - // TODO: Make this more idiot-proof - ERROR_LOG(VIDEO, "%s shader uid mismatch!", - (typeid(UidT) == typeid(PixelShaderUid)) ? "Pixel" : (typeid(UidT) == typeid(VertexShaderUid)) ? "Vertex" : "Other"); - } - } -} - - void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components) { GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components); GetVertexShaderUid(uid->vuid, components, API_OPENGL); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - PixelShaderCode pcode; - VertexShaderCode vcode; - - GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); - GenerateVertexShaderCode(vcode, components, API_OPENGL); - - CheckForUidMismatch(pshaders, pcode, uid->puid); - CheckForUidMismatch(pshaders, vcode, uid->vuid); - } } - ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void) { return *last_entry; From 31d2cab8d3a6344043e00731d9892670e490129d Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 12:55:42 +0200 Subject: [PATCH 36/54] Add ShaderGenCommon to vcproj file list. --- Source/Core/VideoCommon/VideoCommon.vcxproj | 1 + Source/Core/VideoCommon/VideoCommon.vcxproj.filters | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index f785cb5c84..1baf417797 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -250,6 +250,7 @@ + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 330b23d370..e988d34e12 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -258,6 +258,9 @@ Util + + Shader Generators + @@ -291,4 +294,4 @@ {e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6} - + \ No newline at end of file From fab4f1d0a50c2c4c0a4e7e41b6ebe58118438dc0 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 13:38:31 +0200 Subject: [PATCH 37/54] LightingShaderGen: Improve code flexibility. --- .../Core/VideoCommon/Src/LightingShaderGen.h | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index ff19b737f4..2a354aaf8d 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -22,6 +22,41 @@ #include "NativeVertexFormat.h" #include "XFMemory.h" +static const char* LightCol(const char* lightsName, unsigned int index, const char* swizzle) +{ + static char result[16]; + snprintf(result, sizeof(result), "%s[%d].col.%s", lightsName, index, swizzle); + return result; +} + +static const char* LightCosAtt(const char* lightsName, unsigned int index) +{ + static char result[16]; + snprintf(result, sizeof(result), "%s[%d].cosatt", lightsName, index); + return result; +} + +static const char* LightDistAtt(const char* lightsName, unsigned int index) +{ + static char result[16]; + snprintf(result, sizeof(result), "%s[%d].distatt", lightsName, index); + return result; +} + +static const char* LightPos(const char* lightsName, unsigned int index) +{ + static char result[16]; + snprintf(result, sizeof(result), "%s[%d].pos", lightsName, index); + return result; +} + +static const char* LightDir(const char* lightsName, unsigned int index) +{ + static char result[16]; + snprintf(result, sizeof(result), "%s[%d].dir", lightsName, index); + return result; +} + template static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) { @@ -36,13 +71,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, // atten disabled switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += %s[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + object.Write("lacc.%s += %s;\n", swizzle, LightCol(lightsName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - object.Write("ldir = normalize(%s[%d].pos.xyz - pos.xyz);\n", lightsName, index); - object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s[%d].col.%s;\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index, swizzle); + object.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(lightsName, index)); + object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s;\n", + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LightCol(lightsName, index, swizzle)); break; default: _assert_(0); } @@ -51,33 +86,31 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, if (chan.attnfunc == 3) { // spot - object.Write("ldir = %s[%d].pos.xyz - pos.xyz;\n", lightsName, index); + object.Write("ldir = %s.xyz - pos.xyz;\n", LightPos(lightsName, index)); object.Write("dist2 = dot(ldir, ldir);\n" "dist = sqrt(dist2);\n" "ldir = ldir / dist;\n" - "attn = max(0.0f, dot(ldir, %s[%d].dir.xyz));\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s[%d].cosatt.xyz, float3(1.0f, attn, attn*attn))) / dot(%s[%d].distatt.xyz, float3(1.0f,dist,dist2));\n", lightsName, index, lightsName, index); + "attn = max(0.0f, dot(ldir, %s.xyz));\n", LightDir(lightsName, index)); + object.Write("attn = max(0.0f, dot(%s.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.xyz, float3(1.0f,dist,dist2));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index)); } else if (chan.attnfunc == 1) { // specular - object.Write("ldir = normalize(%s[%d].pos.xyz);\n", lightsName, index); - object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s[%d].dir.xyz)) : 0.0f;\n", lightsName, index); - object.Write("attn = max(0.0f, dot(%s[%d].cosatt.xyz, float3(1,attn,attn*attn))) / dot(%s[%d].distatt.xyz, float3(1,attn,attn*attn));\n", lightsName, index, lightsName, index); + object.Write("ldir = normalize(%s.xyz);\n", LightPos(lightsName, index)); + object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.xyz)) : 0.0f;\n", LightDir(lightsName, index)); + object.Write("attn = max(0.0f, dot(%s.xyz, float3(1,attn,attn*attn))) / dot(%s.xyz, float3(1,attn,attn*attn));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index)); } switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * %s[%d].col.%s;\n", swizzle, lightsName, index, swizzle); + object.Write("lacc.%s += attn * %s;\n", swizzle, LightCol(lightsName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s[%d].col.%s;\n", + object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s;\n", swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", - lightsName, - index, - swizzle); + LightCol(lightsName, index, swizzle)); break; default: _assert_(0); } From abde070f63f62a784dcb59fadd0e72013fa03acf Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 14:25:18 +0200 Subject: [PATCH 38/54] LightingShaderGen: Use a float4 array for lights instead of a struct (uniform management in the non-UBO path is a mess otherwise). Also fix a small bug (cf. revision 154c533e7632). --- .../Core/VideoCommon/Src/LightingShaderGen.h | 20 +++++++++---------- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 3 +-- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 2 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 5 ++--- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index 2a354aaf8d..b534d767cc 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -24,36 +24,36 @@ static const char* LightCol(const char* lightsName, unsigned int index, const char* swizzle) { - static char result[16]; - snprintf(result, sizeof(result), "%s[%d].col.%s", lightsName, index, swizzle); + static char result[32]; + snprintf(result, sizeof(result), "%s[5*%d].%s", lightsName, index, swizzle); return result; } static const char* LightCosAtt(const char* lightsName, unsigned int index) { - static char result[16]; - snprintf(result, sizeof(result), "%s[%d].cosatt", lightsName, index); + static char result[32]; + snprintf(result, sizeof(result), "%s[5*%d+1]", lightsName, index); return result; } static const char* LightDistAtt(const char* lightsName, unsigned int index) { - static char result[16]; - snprintf(result, sizeof(result), "%s[%d].distatt", lightsName, index); + static char result[32]; + snprintf(result, sizeof(result), "%s[5*%d+2]", lightsName, index); return result; } static const char* LightPos(const char* lightsName, unsigned int index) { - static char result[16]; - snprintf(result, sizeof(result), "%s[%d].pos", lightsName, index); + static char result[32]; + snprintf(result, sizeof(result), "%s[5*%d+3]", lightsName, index); return result; } static const char* LightDir(const char* lightsName, unsigned int index) { - static char result[16]; - snprintf(result, sizeof(result), "%s[%d].dir", lightsName, index); + static char result[32]; + snprintf(result, sizeof(result), "%s[5*%d+4]", lightsName, index); return result; } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 2d9121856c..4dbb56b320 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -338,8 +338,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u out.Write("\t%sfloat4 " I_FOG"[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG)); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! - out.Write("struct Light { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; };\n"); - out.Write("\t%sLight " I_PLIGHTS"[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); + out.Write("\t%sfloat4 " I_PLIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); out.Write("\t%sfloat4 " I_PMATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS)); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 07af93c295..8356b7defe 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 7c1553890b..52c3aeaf30 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -106,8 +106,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PROJECTION, "float4", I_PROJECTION"[4]"); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_MATERIALS, "float4", I_MATERIALS"[4]"); - out.Write("struct Light { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; };\n"); - DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_LIGHTS, "Light", I_LIGHTS"[8]"); + DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_LIGHTS, "float4", I_LIGHTS"[40]"); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); @@ -337,7 +336,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) // transform the light dir into tangent space uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift; uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift; - out.Write("ldir = normalize(" I_LIGHTS"[%d].pos.xyz - pos.xyz);\n", texinfo.embosslightshift); + out.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(I_LIGHTS, texinfo.embosslightshift)); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); } else From e7a5847c304117c45cd1f9012101627043a58e95 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 14:44:09 +0200 Subject: [PATCH 39/54] ShaderGen: Build fix. --- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 2 +- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 7 +++---- Source/Core/VideoCommon/Src/VertexShaderGen.cpp | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 4dbb56b320..918b8237d6 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -1211,7 +1211,7 @@ void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_T { PixelShaderCode code; GeneratePixelShaderCode(code, dstAlphaMode, API_OPENGL, components); - CheckForUidMismatch(code, object); + CheckForUidMismatch(code, object, "Pixel", "p"); } } diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 8356b7defe..5ad20aa3c7 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -175,7 +175,7 @@ typedef ShaderUid PixelShaderUid; typedef ShaderUid VertexShaderUid; template -void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid) +void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix) { static std::map s_shaders; static std::vector s_uids; @@ -196,7 +196,7 @@ void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid) char szTemp[MAX_PATH]; sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), - (typeid(UidT) == typeid(PixelShaderUid)) ? "p" : (typeid(UidT) == typeid(VertexShaderUid)) ? "v" : "o", + dump_prefix, ++num_failures); // TODO: Should also dump uids @@ -207,8 +207,7 @@ void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid) file.close(); // TODO: Make this more idiot-proof - ERROR_LOG(VIDEO, "%s shader uid mismatch!", - (typeid(UidT) == typeid(PixelShaderUid)) ? "Pixel" : (typeid(UidT) == typeid(VertexShaderUid)) ? "Vertex" : "Other"); + ERROR_LOG(VIDEO, "%s shader uid mismatch!", shader_type); } } } diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 52c3aeaf30..294e363756 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -528,7 +528,7 @@ void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_ty { VertexShaderCode code; GenerateVertexShaderCode(code, components, API_OPENGL); - CheckForUidMismatch(code, object); + CheckForUidMismatch(code, object, "Vertex", "v"); } } From 7480f5dfd678bd5bb6b05b309155c81cfa2c42f5 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 10 Apr 2013 14:55:46 +0200 Subject: [PATCH 40/54] ShaderGenCommon: Clean up. --- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 5ad20aa3c7..e148fac54c 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -23,7 +23,6 @@ #include #include #include -#include #include "CommonTypes.h" #include "VideoCommon.h" @@ -168,12 +167,6 @@ struct LightingUidData } lit_chans[4]; }; -struct pixel_shader_uid_data; -struct vertex_shader_uid_data; - -typedef ShaderUid PixelShaderUid; -typedef ShaderUid VertexShaderUid; - template void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix) { From eef95fa4c5919bf93335e10dd61cd5a9fd6a5f6d Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Thu, 25 Apr 2013 13:30:41 +0200 Subject: [PATCH 41/54] ShaderGenCommon: Adding documentation. --- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index e148fac54c..4b3d16b8d7 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -27,18 +27,55 @@ #include "CommonTypes.h" #include "VideoCommon.h" +/** + * Common interface for classes that need to go through the shader generation path (GenerateVertexShader, GeneratePixelShader) + * In particular, this includes the shader code generator (ShaderCode). + * A different class (ShaderUid) can be used to uniquely identify each ShaderCode object. + * More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader constants are being used. This can be used to optimize buffer management. + * Each of the ShaderCode, ShaderUid and ShaderConstantProfile child classes only implement the subset of ShaderGeneratorInterface methods that are required for the specific tasks. + */ class ShaderGeneratorInterface { public: + /* + * Used when the shader generator would write a piece of ShaderCode. + * Can be used like printf. + * @note In the ShaderCode implementation, this does indeed write the parameter string to an internal buffer. However, you're free to do whatever you like with the parameter. + */ void Write(const char* fmt, ...) {} + + /* + * Returns a read pointer to the internal buffer. + * @note When implementing this method in a child class, you likely want to return the argument of the last SetBuffer call here + * @note SetBuffer() should be called before using GetBuffer(). + */ const char* GetBuffer() { return NULL; } + + /* + * Can be used to give the object a place to write to. This should be called before using Write(). + * @param buffer pointer to a char buffer that the object can write to + */ void SetBuffer(char* buffer) { } + + /* + * Tells us that a specific constant range (including last_index) is being used by the shader + */ inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {} + /* + * Returns a pointer to an internally stored object of the uid_data type. + * @warning since most child classes use the default implementation you shouldn't access this directly without adding precautions against NULL access (e.g. via adding a dummy structure, cf. the vertex/pixel shader generators) + */ template uid_data& GetUidData() { return *(uid_data*)NULL; } }; +/** + * Shader UID class used to uniquely identify the ShaderCode output written in the shader generator. + * uid_data can be any struct of parameters that uniquely identify each shader code output. + * Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint. + * Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw u32 values from a union. + */ template class ShaderUid : public ShaderGeneratorInterface { @@ -59,9 +96,10 @@ public: return memcmp(this->values, obj.values, sizeof(values)) != 0; } - // TODO: Store last frame used and order by that? makes much more sense anyway... + // determines the storage order inside STL containers bool operator < (const ShaderUid& obj) const { + // TODO: Store last frame used and order by that? makes much more sense anyway... for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i) { if (this->values[i] < obj.values[i]) @@ -107,6 +145,9 @@ private: char* write_ptr; }; +/** + * Generates a shader constant profile which can be used to query which constants are used in a shader + */ class ShaderConstantProfile : public ShaderGeneratorInterface { public: @@ -120,6 +161,7 @@ public: inline bool ConstantIsUsed(unsigned int index) { + // TODO: Not ready for usage yet return true; // return constant_usage[index]; } @@ -154,6 +196,10 @@ static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const object.Write(";\n"); } +/** + * Common uid data used for shader generators that use lighting calculations. + * Expected to be stored as a member called "lighting". + */ struct LightingUidData { struct @@ -167,9 +213,13 @@ struct LightingUidData } lit_chans[4]; }; +/** + * Checks if there has been + */ template void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix) { + // TODO: Might be sensitive to config changes static std::map s_shaders; static std::vector s_uids; From adab4e37f7dcfe9a60819a2ed1f8576b8a57a505 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 29 Apr 2013 19:37:32 +0200 Subject: [PATCH 42/54] Fix some mistakes from the master merge; some cleanups. --- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 10 +++++----- Source/Core/VideoCommon/Src/PixelShaderGen.h | 10 ++-------- Source/Core/VideoCommon/Src/VertexShaderGen.cpp | 7 +------ 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 4187c44d0f..1bf4ca25d8 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -254,7 +254,7 @@ const char *WriteLocation(API_TYPE ApiType) } template -void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) +static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; @@ -333,9 +333,9 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u if (ApiType == API_OPENGL) { - out.Write("COLOROUT(ocol0);\n"); + out.Write("COLOROUT(ocol0)\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) - out.Write("COLOROUT(ocol1);\n"); + out.Write("COLOROUT(ocol1)\n"); if (per_pixel_depth) out.Write("#define depth gl_FragDepth\n"); @@ -572,7 +572,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u if (Pretest == AlphaTest::UNDETERMINED) WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, per_pixel_depth); - + // the screen space depth value = far z + (clip z / clip w) * z range if(ApiType == API_OPENGL || ApiType == API_D3D11) out.Write("float zCoord = rawpos.z;\n"); @@ -584,7 +584,7 @@ void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u } // Note: depth textures are disabled if early depth test is enabled - uid_data.ztex.op = bpmem.ztex2.op; + uid_data.ztex_op = bpmem.ztex2.op; uid_data.per_pixel_depth = per_pixel_depth; uid_data.fog.fsel = bpmem.fog.c_proj_fsel.fsel; diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 024bc170b5..783f50a301 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -86,7 +86,7 @@ struct pixel_shader_uid_data u32 bc3 : 3; u32 bi4 : 3; u32 bc4 : 3; - void SetValues(int index, u32 texcoord, u32 texmap) + inline void SetValues(int index, u32 texcoord, u32 texmap) { if (index == 0) { bc0 = texcoord; bi0 = texmap; } else if (index == 1) { bc1 = texcoord; bi1 = texmap; } @@ -180,7 +180,6 @@ struct pixel_shader_uid_data u32 comp0 : 3; u32 comp1 : 3; u32 logic : 2; - // TODO: ref??? u32 use_zcomploc_hack : 1; } alpha_test; @@ -194,12 +193,7 @@ struct pixel_shader_uid_data u32 hex : 4; } fog; - union { - struct { - u32 op : 2; - }; - u32 hex : 2; - } ztex; + u32 ztex_op : 2; u32 per_pixel_depth : 1; u32 bHasIndStage : 16; diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index eac041d5c9..81b8c79553 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -80,11 +80,6 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11); - u32 lightMask = 0; - if (xfregs.numChan.numColorChans > 0) - lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask(); - if (xfregs.numChan.numColorChans > 1) - lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask(); // uniforms if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) @@ -112,7 +107,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { out.Write("ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); if (components & VB_HAS_POSMTXIDX) - out.Write("ATTRIN float posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) out.Write("ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) From 77dae3496a45b8f885119fc65da93cb0699c0bb2 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 29 Apr 2013 19:52:12 +0200 Subject: [PATCH 43/54] PixelShaderGen: Cleanups. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 46 ++++++------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 1bf4ca25d8..b9df2ebe8e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -233,26 +233,6 @@ static void BuildSwapModeTable() } } -const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num) -{ - if (ApiType == API_OPENGL) - return ""; // Nothing to do here - - static char result[64]; - sprintf(result, " : register(%s%d)", prefix, num); - return result; -} - -const char *WriteLocation(API_TYPE ApiType) -{ - if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) - return ""; - - static char result[64]; - sprintf(result, "uniform "); - return result; -} - template static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { @@ -273,8 +253,8 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api bool per_pixel_depth = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable; out.Write("//Pixel Shader for TEV stages\n"); - out.Write("//%i TEV stages, %i texgens, XXX IND stages\n", - numStages, numTexgen/*, bpmem.genMode.numindstages*/); + out.Write("//%i TEV stages, %i texgens, %i IND stages\n", + numStages, numTexgen, bpmem.genMode.numindstages); uid_data.dstAlphaMode = dstAlphaMode; uid_data.genMode.numindstages = bpmem.genMode.numindstages; @@ -299,7 +279,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api { // Declare samplers for (int i = 0; i < 8; ++i) - out.Write("%s samp%d %s;\n", (ApiType == API_D3D11) ? "sampler" : "uniform sampler2D", i, WriteRegister(ApiType, "s", i)); + out.Write("%s samp%d : register(s%d);\n", (ApiType == API_D3D11) ? "sampler" : "uniform sampler2D", i, i); if (ApiType == API_D3D11) { @@ -315,18 +295,18 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("layout(std140) uniform PSBlock {\n"); - out.Write("\t%sfloat4 " I_COLORS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_COLORS)); // TODO: first element not used?? - out.Write("\t%sfloat4 " I_KCOLORS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_KCOLORS)); - out.Write("\t%sfloat4 " I_ALPHA"[1] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ALPHA)); // TODO: Why is this an array...-.- - out.Write("\t%sfloat4 " I_TEXDIMS"[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXDIMS)); - out.Write("\t%sfloat4 " I_ZBIAS"[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ZBIAS)); - out.Write("\t%sfloat4 " I_INDTEXSCALE"[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXSCALE)); - out.Write("\t%sfloat4 " I_INDTEXMTX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXMTX)); - out.Write("\t%sfloat4 " I_FOG"[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG)); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_COLORS, "float4", I_COLORS"[4]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_KCOLORS, "float4", I_KCOLORS"[4]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_ALPHA, "float4", I_ALPHA"[1]"); // TODO: Why is this an array...-.- + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_ZBIAS, "float4", I_ZBIAS"[2]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_INDTEXMTX, "float4", I_INDTEXMTX"[6]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_FOG, "float4", I_FOG"[3]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! - out.Write("\t%sfloat4 " I_PLIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS)); - out.Write("\t%sfloat4 " I_PMATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS)); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PLIGHTS, "float4", I_PLIGHTS"[40]"); + DeclareUniform(out, ApiType, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PMATERIALS, "float4", I_PMATERIALS"[4]"); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("};\n"); From 02afec507622d2fcf705f9147de21f081f77aaa5 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 29 Apr 2013 21:00:39 +0200 Subject: [PATCH 44/54] Polish shader uid checking. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 9 +- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 83 +++++++++++++------ .../Core/VideoCommon/Src/VertexShaderGen.cpp | 7 -- .../Plugin_VideoDX11/Src/PixelShaderCache.cpp | 10 ++- .../Plugin_VideoDX11/Src/PixelShaderCache.h | 2 + .../Src/VertexShaderCache.cpp | 9 ++ .../Plugin_VideoDX11/Src/VertexShaderCache.h | 2 + .../Plugin_VideoDX9/Src/PixelShaderCache.cpp | 8 ++ .../Plugin_VideoDX9/Src/PixelShaderCache.h | 2 + .../Plugin_VideoDX9/Src/VertexShaderCache.cpp | 9 ++ .../Plugin_VideoDX9/Src/VertexShaderCache.h | 3 + .../Src/ProgramShaderCache.cpp | 16 ++++ .../Plugin_VideoOGL/Src/ProgramShaderCache.h | 3 + 13 files changed, 120 insertions(+), 43 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index b9df2ebe8e..179560ddfa 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -392,7 +392,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { - for (int i = 0; i < numTexgen; ++i) + for (unsigned int i = 0; i < numTexgen; ++i) out.Write(",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i); out.Write(",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen); if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) @@ -1190,13 +1190,6 @@ static void WriteFog(T& out, pixel_shader_uid_data& uid_data) void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { GeneratePixelShader(object, dstAlphaMode, ApiType, components); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - PixelShaderCode code; - GeneratePixelShaderCode(code, dstAlphaMode, API_OPENGL, components); - CheckForUidMismatch(code, object, "Pixel", "p"); - } } void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index 4b3d16b8d7..ebf2ae1b87 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -113,6 +113,9 @@ public: template inline T& GetUidData() { return data; } + const uid_data& GetUidData() const { return data; } + size_t GetUidDataSize() const { return sizeof(values); } + private: union { @@ -217,42 +220,68 @@ struct LightingUidData * Checks if there has been */ template -void CheckForUidMismatch(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix) +class UidChecker { - // TODO: Might be sensitive to config changes - static std::map s_shaders; - static std::vector s_uids; - - bool uid_is_indexed = std::find(s_uids.begin(), s_uids.end(), new_uid) != s_uids.end(); - if (!uid_is_indexed) +public: + void Invalidate() { - s_uids.push_back(new_uid); - s_shaders[new_uid] = new_code.GetBuffer(); + m_shaders.clear(); + m_uids.clear(); } - else + + void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix) { - // uid is already in the index => check if there's a shader with the same uid but different code - auto& old_code = s_shaders[new_uid]; - if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0) + bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end(); + if (!uid_is_indexed) { - static int num_failures = 0; + m_uids.push_back(new_uid); + m_shaders[new_uid] = new_code.GetBuffer(); + } + else + { + // uid is already in the index => check if there's a shader with the same uid but different code + auto& old_code = m_shaders[new_uid]; + if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0) + { + static int num_failures = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), - dump_prefix, - ++num_failures); + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), + dump_prefix, + ++num_failures); - // TODO: Should also dump uids - std::ofstream file; - OpenFStream(file, szTemp, std::ios_base::out); - file << "Old shader code:\n" << old_code; - file << "\n\nNew shader code:\n" << new_code.GetBuffer(); - file.close(); + // TODO: Should also dump uids + std::ofstream file; + OpenFStream(file, szTemp, std::ios_base::out); + file << "Old shader code:\n" << old_code; + file << "\n\nNew shader code:\n" << new_code.GetBuffer(); + file << "\n\nShader uid:\n"; + for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i) + { + u32 value = ((u32*)&new_uid.GetUidData())[i]; + if ((i % 4) == 0) + { + unsigned int last_value = (i+3 < new_uid.GetUidDataSize()-1) ? i+3 : new_uid.GetUidDataSize(); + file << std::setfill(' ') << std::dec; + file << "Values " << std::setw(2) << i << " - " << last_value << ": "; + } - // TODO: Make this more idiot-proof - ERROR_LOG(VIDEO, "%s shader uid mismatch!", shader_type); + file << std::setw(8) << std::setfill('0') << std::hex << value << std::setw(1); + if ((i % 4) < 3) + file << ' '; + else + file << std::endl; + } + file.close(); + + ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, szTemp); + } } } -} + +private: + std::map m_shaders; + std::vector m_uids; +}; #endif // _SHADERGENCOMMON_H diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 81b8c79553..6cbf273097 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -538,13 +538,6 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type) { GenerateVertexShader(object, components, api_type); - - if (g_ActiveConfig.bEnableShaderDebugging) - { - VertexShaderCode code; - GenerateVertexShaderCode(code, components, API_OPENGL); - CheckForUidMismatch(code, object, "Vertex", "v"); - } } void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp index 9db0a53d13..0adf82fef9 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp @@ -29,6 +29,7 @@ namespace DX11 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry; PixelShaderUid PixelShaderCache::last_uid; +UidChecker PixelShaderCache::pixel_uid_checker; LinearDiskCache g_ps_disk_cache; @@ -412,7 +413,8 @@ void PixelShaderCache::Clear() { for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) iter->second.Destroy(); - PixelShaders.clear(); + PixelShaders.clear(); + pixel_uid_checker.Invalidate(); last_entry = NULL; } @@ -450,6 +452,12 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) { PixelShaderUid uid; GetPixelShaderUid(uid, dstAlphaMode, API_D3D11, components); + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components); + pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); + } // Check if the shader is already set if (last_entry) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h index ace3a9ac6a..dee930ccfe 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.h @@ -52,6 +52,8 @@ private: static PSCache PixelShaders; static const PSCacheEntry* last_entry; static PixelShaderUid last_uid; + + static UidChecker pixel_uid_checker; }; } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp index 19b276ddb6..2754267b09 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.cpp @@ -25,6 +25,7 @@ namespace DX11 { VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; VertexShaderUid VertexShaderCache::last_uid; +UidChecker VertexShaderCache::vertex_uid_checker; static ID3D11VertexShader* SimpleVertexShader = NULL; static ID3D11VertexShader* ClearVertexShader = NULL; @@ -174,6 +175,7 @@ void VertexShaderCache::Clear() for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) iter->second.Destroy(); vshaders.clear(); + vertex_uid_checker.Invalidate(); last_entry = NULL; } @@ -197,6 +199,13 @@ bool VertexShaderCache::SetShader(u32 components) { VertexShaderUid uid; GetVertexShaderUid(uid, components, API_D3D11); + if (g_ActiveConfig.bEnableShaderDebugging) + { + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D11); + vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); + } + if (last_entry) { if (uid == last_uid) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h index 04f74bc055..b80dbcd7b1 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexShaderCache.h @@ -59,6 +59,8 @@ private: static VSCache vshaders; static const VSCacheEntry* last_entry; static VertexShaderUid last_uid; + + static UidChecker vertex_uid_checker; }; } // namespace DX11 diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index eb47614ae5..bbb46e4d56 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -31,6 +31,7 @@ namespace DX9 PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry; PixelShaderUid PixelShaderCache::last_uid; +UidChecker PixelShaderCache::pixel_uid_checker; static LinearDiskCache g_ps_disk_cache; static std::set unique_shaders; @@ -284,6 +285,7 @@ void PixelShaderCache::Clear() for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) iter->second.Destroy(); PixelShaders.clear(); + pixel_uid_checker.Invalidate(); last_entry = NULL; } @@ -322,6 +324,12 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; PixelShaderUid uid; GetPixelShaderUid(uid, dstAlphaMode, API_D3D9, components); + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode code; + GeneratePixelShaderCode(code, dstAlphaMode, API_D3D9, components); + pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); + } // Check if the shader is already set if (last_entry) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index aadd2c02f7..733a68233a 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -44,6 +44,8 @@ private: static PSCache PixelShaders; static const PSCacheEntry *last_entry; static PixelShaderUid last_uid; + static UidChecker pixel_uid_checker; + static void Clear(); public: diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 9223533b3a..2d11368a6d 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -26,6 +26,7 @@ namespace DX9 VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; VertexShaderUid VertexShaderCache::last_uid; +UidChecker VertexShaderCache::vertex_uid_checker; #define MAX_SSAA_SHADERS 3 @@ -150,6 +151,7 @@ void VertexShaderCache::Clear() for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) iter->second.Destroy(); vshaders.clear(); + vertex_uid_checker.Invalidate(); last_entry = NULL; } @@ -176,6 +178,13 @@ bool VertexShaderCache::SetShader(u32 components) { VertexShaderUid uid; GetVertexShaderUid(uid, components, API_D3D9); + if (g_ActiveConfig.bEnableShaderDebugging) + { + VertexShaderCode code; + GenerateVertexShaderCode(code, components, API_D3D9); + vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v"); + } + if (last_entry) { if (uid == last_uid) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h index 88c295c435..32fbea92be 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h @@ -38,6 +38,9 @@ private: static VSCache vshaders; static const VSCacheEntry *last_entry; static VertexShaderUid last_uid; + + static UidChecker vertex_uid_checker; + static void Clear(); public: diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp index bab52073fb..732c63ac67 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.cpp @@ -29,6 +29,8 @@ static GLuint CurrentProgram = 0; ProgramShaderCache::PCache ProgramShaderCache::pshaders; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry; SHADERUID ProgramShaderCache::last_uid; +UidChecker ProgramShaderCache::pixel_uid_checker; +UidChecker ProgramShaderCache::vertex_uid_checker; static char s_glsl_header[1024] = ""; @@ -351,6 +353,17 @@ void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, { GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components); GetVertexShaderUid(uid->vuid, components, API_OPENGL); + + if (g_ActiveConfig.bEnableShaderDebugging) + { + PixelShaderCode pcode; + GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components); + pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); + + VertexShaderCode vcode; + GenerateVertexShaderCode(vcode, components, API_OPENGL); + vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v"); + } } ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void) @@ -448,6 +461,9 @@ void ProgramShaderCache::Shutdown(void) iter->second.Destroy(); pshaders.clear(); + pixel_uid_checker.Invalidate(); + vertex_uid_checker.Invalidate(); + if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) { delete s_buffer; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h index 9cd9a9b12c..30428ad6de 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/ProgramShaderCache.h @@ -106,6 +106,9 @@ private: static PCacheEntry* last_entry; static SHADERUID last_uid; + static UidChecker pixel_uid_checker; + static UidChecker vertex_uid_checker; + static GLintptr s_vs_data_size; static GLintptr s_ps_data_size; static GLintptr s_vs_data_offset; From 9cb263ad48b1b5f52f69e84b534807427110b145 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 29 Apr 2013 21:19:48 +0200 Subject: [PATCH 45/54] PixelShaderGen: Set some uid fields that I missed before. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 19 ++++++++++------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 21 ++++++++++++++++++- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 179560ddfa..2be42bcf43 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -790,10 +790,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap1 << (i*2); - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap1 << (i*2 + 1); - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap2 << (i*2 + 16); - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap2 << (i*2 + 17); + uid_data.tevksel[i*2 ].swap1 = bpmem.tevksel[i*2 ].swap1; + uid_data.tevksel[i*2+1].swap1 = bpmem.tevksel[i*2+1].swap1; + uid_data.tevksel[i*2 ].swap2 = bpmem.tevksel[i*2 ].swap2; + uid_data.tevksel[i*2+1].swap2 = bpmem.tevksel[i*2+1].swap2; char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); @@ -813,13 +813,14 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE } const int i = bpmem.combiners[n].alphaC.tswap; - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap1 << (i*2); - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap1 << (i*2 + 1); - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2 ].swap2 << (i*2 + 16); - uid_data.tevksel_n_swap |= bpmem.tevksel[i*2+1].swap2 << (i*2 + 17); + uid_data.tevksel[i*2 ].swap1 = bpmem.tevksel[i*2 ].swap1; + uid_data.tevksel[i*2+1].swap1 = bpmem.tevksel[i*2+1].swap1; + uid_data.tevksel[i*2 ].swap2 = bpmem.tevksel[i*2 ].swap2; + uid_data.tevksel[i*2+1].swap2 = bpmem.tevksel[i*2+1].swap2; char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); + uid_data.tevindref.SetTexmap(i, texmap); SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); } else @@ -833,6 +834,8 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE { int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); + uid_data.tevksel[n/2].set_kcsel(n & 1, kc); + uid_data.tevksel[n/2].set_kasel(n & 1, ka); out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if(kc > 7 || ka > 7) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 783f50a301..2340b96af7 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -93,6 +93,13 @@ struct pixel_shader_uid_data else if (index == 2) { bc3 = texcoord; bi2 = texmap; } else if (index == 3) { bc4 = texcoord; bi4 = texmap; } } + inline void SetTexmap(int index, u32 texmap) + { + if (index == 0) { bi0 = texmap; } + else if (index == 1) { bi1 = texmap; } + else if (index == 2) { bi2 = texmap; } + else if (index == 3) { bi4 = texmap; } + } } tevindref; u32 tevorders_n_texcoord1 : 24; // 8 x 3 bit @@ -135,7 +142,19 @@ struct pixel_shader_uid_data } } tevind_n; - u32 tevksel_n_swap : 32; // 8 x 2 bit (swap1) + 8 x 2 bit (swap2) + struct + { + u32 swap1 : 2; + u32 swap2 : 2; + u32 kcsel0 : 5; + u32 kasel0 : 5; + u32 kcsel1 : 5; + u32 kasel1 : 5; + + void set_kcsel(int i, u32 value) { if (i) kcsel1 = value; else kcsel0 = value; } + void set_kasel(int i, u32 value) { if( i) kasel1 = value; else kasel0 = value; } + } tevksel[8]; + struct { union { From 32b78a85726eb1d82c461ceefa066fc723cf4413 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 1 May 2013 11:39:30 +0200 Subject: [PATCH 46/54] First steps of making the pixel shader uid struct more compact. --- Source/Core/VideoCommon/Src/BPMemory.h | 2 +- .../Core/VideoCommon/Src/LightingShaderGen.h | 30 +-- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 91 ++++--- Source/Core/VideoCommon/Src/PixelShaderGen.h | 246 ++++++++---------- Source/Core/VideoCommon/Src/ShaderGenCommon.h | 17 +- 5 files changed, 181 insertions(+), 205 deletions(-) diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index 0ae29a3022..5254ba7bd7 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -376,7 +376,7 @@ struct TevStageCombiner union TwoTevStageOrders { - struct + struct { u32 texmap0 : 3; // indirect tex stage texmap u32 texcoord0 : 3; diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index fde60be891..36bc52749a 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -54,8 +54,8 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, else if (coloralpha == 2) swizzle = "w"; - uid_data.lit_chans[litchan_index].attnfunc = chan.attnfunc; - uid_data.lit_chans[litchan_index].diffusefunc = chan.diffusefunc; + uid_data.attnfunc |= chan.attnfunc << (2*litchan_index); + uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index); if (!(chan.attnfunc & 1)) { // atten disabled @@ -124,7 +124,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com object.Write("{\n"); - uid_data.lit_chans[j].matsource = xfregs.color[j].matsource; + uid_data.matsource |= xfregs.color[j].matsource << j; if (color.matsource) // from vertex { if (components & (VB_HAS_COL0 << j)) @@ -139,10 +139,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com object.Write("mat = %s[%d];\n", materialsName, j+2); } - uid_data.lit_chans[j].enablelighting = xfregs.color[j].enablelighting; + uid_data.enablelighting |= xfregs.color[j].enablelighting << j; if (color.enablelighting) { - uid_data.lit_chans[j].ambsource = xfregs.color[j].ambsource; + uid_data.ambsource |= xfregs.color[j].ambsource << j; if (color.ambsource) // from vertex { if (components & (VB_HAS_COL0<() != NULL) ? out.template GetUidData() : dummy_data; + ERROR_LOG(VIDEO, "%lu", sizeof(pixel_shader_uid_data)); + out.SetBuffer(text); if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, "C"); // Reset locale for compilation @@ -257,9 +259,9 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api numStages, numTexgen, bpmem.genMode.numindstages); uid_data.dstAlphaMode = dstAlphaMode; - uid_data.genMode.numindstages = bpmem.genMode.numindstages; - uid_data.genMode.numtevstages = bpmem.genMode.numtevstages; - uid_data.genMode.numtexgens = bpmem.genMode.numtexgens; + uid_data.genMode_numindstages = bpmem.genMode.numindstages; + uid_data.genMode_numtevstages = bpmem.genMode.numtevstages; + uid_data.genMode_numtexgens = bpmem.genMode.numtexgens; if (ApiType == API_OPENGL) { @@ -466,7 +468,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides - uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection; + uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i; if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { out.Write("\tif (uv%d.z != 0.0f)", i); @@ -496,7 +498,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api unsigned int texcoord = bpmem.tevindref.getTexCoord(i); unsigned int texmap = bpmem.tevindref.getTexMap(i); - uid_data.tevindref.SetValues(i, texcoord, texmap); + uid_data.SetTevindrefValues(i, texcoord, texmap); if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); @@ -566,7 +568,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api // Note: depth textures are disabled if early depth test is enabled uid_data.ztex_op = bpmem.ztex2.op; uid_data.per_pixel_depth = per_pixel_depth; - uid_data.fog.fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; @@ -687,13 +689,12 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.Write("// TEV stage %d\n", n); uid_data.bHasIndStage |= bHasIndStage << n; - if (n < 8) { uid_data.tevorders_n_texcoord1 |= texcoord << (3 * n); } - else uid_data.tevorders_n_texcoord2 |= texcoord << (3 * n - 24); + uid_data.tevorders_n_texcoord |= texcoord << (3 * n); if (bHasIndStage) { - uid_data.tevind_n.bs |= bpmem.tevind[n].bs << (2*n); - uid_data.tevind_n.bt |= bpmem.tevind[n].bt << (2*n); - uid_data.tevind_n.fmt |= bpmem.tevind[n].fmt << (2*n); + uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n); + uid_data.tevind_n_bt |= bpmem.tevind[n].bt << (2*n); + uid_data.tevind_n_fmt |= bpmem.tevind[n].fmt << (2*n); out.Write("// indirect op\n"); // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords @@ -708,12 +709,12 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); // bias - uid_data.tevind_n.Set_bias(n, bpmem.tevind[n].bias); + uid_data.Set_tevind_bias(n, bpmem.tevind[n].bias); if (bpmem.tevind[n].bias != ITB_NONE ) out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale - uid_data.tevind_n.Set_mid(n, bpmem.tevind[n].mid); + uid_data.Set_tevind_mid(n, bpmem.tevind[n].mid); if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) @@ -750,9 +751,9 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE // --------- // Wrapping // --------- - uid_data.tevind_n.Set_sw(n, bpmem.tevind[n].sw); - uid_data.tevind_n.Set_tw(n, bpmem.tevind[n].tw); - uid_data.tevind_n.fb_addprev |= bpmem.tevind[n].fb_addprev << n; + uid_data.Set_tevind_sw(n, bpmem.tevind[n].sw); + uid_data.Set_tevind_tw(n, bpmem.tevind[n].tw); + uid_data.tevind_n_fb_addprev |= bpmem.tevind[n].fb_addprev << n; // wrap S if (bpmem.tevind[n].sw == ITW_OFF) @@ -779,8 +780,26 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - uid_data.combiners[n].colorC.hex = cc.hex & 0xFFFFFF; - uid_data.combiners[n].alphaC.hex = ac.hex & 0xFFFFFF; + uid_data.cc_n_d = cc.d; + uid_data.cc_n_c = cc.c; + uid_data.cc_n_b = cc.b; + uid_data.cc_n_a = cc.a; + uid_data.cc_n_bias = cc.bias; + uid_data.cc_n_op = cc.op; + uid_data.cc_n_clamp = cc.clamp; + uid_data.cc_n_shift = cc.shift; + uid_data.cc_n_dest = cc.dest; + uid_data.ac_n_rswap = ac.rswap; + uid_data.ac_n_tswap = ac.tswap; + uid_data.ac_n_d = ac.d; + uid_data.ac_n_c = ac.c; + uid_data.ac_n_b = ac.b; + uid_data.ac_n_a = ac.a; + uid_data.ac_n_bias = ac.bias; + uid_data.ac_n_op = ac.op; + uid_data.ac_n_clamp = ac.clamp; + uid_data.ac_n_shift = ac.shift; + uid_data.ac_n_dest = ac.dest; if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC @@ -790,10 +809,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; - uid_data.tevksel[i*2 ].swap1 = bpmem.tevksel[i*2 ].swap1; - uid_data.tevksel[i*2+1].swap1 = bpmem.tevksel[i*2+1].swap1; - uid_data.tevksel[i*2 ].swap2 = bpmem.tevksel[i*2 ].swap2; - uid_data.tevksel[i*2+1].swap2 = bpmem.tevksel[i*2+1].swap2; + uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); + uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); + uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); + uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); @@ -813,14 +832,14 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE } const int i = bpmem.combiners[n].alphaC.tswap; - uid_data.tevksel[i*2 ].swap1 = bpmem.tevksel[i*2 ].swap1; - uid_data.tevksel[i*2+1].swap1 = bpmem.tevksel[i*2+1].swap1; - uid_data.tevksel[i*2 ].swap2 = bpmem.tevksel[i*2 ].swap2; - uid_data.tevksel[i*2+1].swap2 = bpmem.tevksel[i*2+1].swap2; + uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); + uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); + uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); + uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); - uid_data.tevindref.SetTexmap(i, texmap); + uid_data.SetTevindrefTexmap(i, texmap); SampleTexture(out, "textemp", "tevcoord", texswap, texmap, ApiType); } else @@ -834,8 +853,8 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE { int kc = bpmem.tevksel[n / 2].getKC(n & 1); int ka = bpmem.tevksel[n / 2].getKA(n & 1); - uid_data.tevksel[n/2].set_kcsel(n & 1, kc); - uid_data.tevksel[n/2].set_kasel(n & 1, ka); + uid_data.set_tevksel_kcsel(n/2, n & 1, kc); + uid_data.set_tevksel_kasel(n/2, n & 1, ka); out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if(kc > 7 || ka > 7) { @@ -1086,9 +1105,9 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE Api // using discard then return works the same in cg and dx9 but not in dx11 out.Write("\tif(!( "); - uid_data.alpha_test.comp0 = bpmem.alpha_test.comp0; - uid_data.alpha_test.logic = bpmem.alpha_test.comp1; - uid_data.alpha_test.logic = bpmem.alpha_test.logic; + uid_data.alpha_test_comp0 = bpmem.alpha_test.comp0; + uid_data.alpha_test_logic = bpmem.alpha_test.comp1; + uid_data.alpha_test_logic = bpmem.alpha_test.logic; // Lookup the first component from the alpha function table int compindex = bpmem.alpha_test.comp0; @@ -1117,7 +1136,7 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE Api // when the alpha test fail. This is not a correct implementation because // even if the depth test fails the fragment could be alpha blended, but // we don't have a choice. - uid_data.alpha_test.use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable; + uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable; if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) { out.Write("\t\tdiscard;\n"); @@ -1143,11 +1162,11 @@ static const char *tevFogFuncsTable[] = template static void WriteFog(T& out, pixel_shader_uid_data& uid_data) { - uid_data.fog.fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; if(bpmem.fog.c_proj_fsel.fsel == 0) return; // no Fog - uid_data.fog.proj = bpmem.fog.c_proj_fsel.proj; + uid_data.fog_proj = bpmem.fog.c_proj_fsel.proj; out.SetConstantsUsed(C_FOG, C_FOG+1); if (bpmem.fog.c_proj_fsel.proj == 0) @@ -1166,7 +1185,7 @@ static void WriteFog(T& out, pixel_shader_uid_data& uid_data) // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. - uid_data.fog.RangeBaseEnabled = bpmem.fogRange.Base.Enabled; + uid_data.fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (bpmem.fogRange.Base.Enabled) { out.SetConstantsUsed(C_FOG+2, C_FOG+2); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 2340b96af7..5ae3febfa3 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -55,162 +55,119 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, {I_PMATERIALS, C_PMATERIALS, 4 }, }; +// TODO: Packing? +//#pragma pack(4) struct pixel_shader_uid_data { u32 components; - DSTALPHA_MODE dstAlphaMode; // TODO: as u32 :2 - AlphaTest::TEST_RESULT Pretest; // TODO: As :2 + u32 dstAlphaMode : 2; + u32 Pretest : 2; + + u32 genMode_numtexgens : 4; + u32 genMode_numtevstages : 4; + u32 genMode_numindstages : 3; + u32 nIndirectStagesUsed : 8; - struct { - u32 numtexgens : 4; - u32 numtevstages : 4; - u32 numindstages : 3; - } genMode; - struct + + u32 texMtxInfo_n_unknown : 8; // 8x1 bit + u32 texMtxInfo_n_projection : 8; // 8x1 bit + u32 texMtxInfo_n_inputform : 16; // 8x2 bits + u32 texMtxInfo_n_texgentype : 24; // 8x3 bits + u64 texMtxInfo_n_sourcerow : 40; // 8x5 bits + u32 texMtxInfo_n_embosssourceshift : 24; // 8x3 bits + u32 texMtxInfo_n_embosslightshift : 24; // 8x3 bits + + u32 tevindref_bi0 : 3; + u32 tevindref_bc0 : 3; + u32 tevindref_bi1 : 3; + u32 tevindref_bc1 : 3; + u32 tevindref_bi2 : 3; + u32 tevindref_bc3 : 3; + u32 tevindref_bi4 : 3; + u32 tevindref_bc4 : 3; + inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap) { - u32 unknown : 1; - u32 projection : 1; // XF_TEXPROJ_X - u32 inputform : 2; // XF_TEXINPUT_X - u32 texgentype : 3; // XF_TEXGEN_X - u32 sourcerow : 5; // XF_SRCGEOM_X - u32 embosssourceshift : 3; // what generated texcoord to use - u32 embosslightshift : 3; // light index that is used - } texMtxInfo[8]; - struct + if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; } + else if (index == 1) { tevindref_bc1 = texcoord; tevindref_bi1 = texmap; } + else if (index == 2) { tevindref_bc3 = texcoord; tevindref_bi2 = texmap; } + else if (index == 3) { tevindref_bc4 = texcoord; tevindref_bi4 = texmap; } + } + inline void SetTevindrefTexmap(int index, u32 texmap) { - u32 bi0 : 3; // indirect tex stage 0 ntexmap - u32 bc0 : 3; // indirect tex stage 0 ntexcoord - u32 bi1 : 3; - u32 bc1 : 3; - u32 bi2 : 3; - u32 bc3 : 3; - u32 bi4 : 3; - u32 bc4 : 3; - inline void SetValues(int index, u32 texcoord, u32 texmap) - { - if (index == 0) { bc0 = texcoord; bi0 = texmap; } - else if (index == 1) { bc1 = texcoord; bi1 = texmap; } - else if (index == 2) { bc3 = texcoord; bi2 = texmap; } - else if (index == 3) { bc4 = texcoord; bi4 = texmap; } - } - inline void SetTexmap(int index, u32 texmap) - { - if (index == 0) { bi0 = texmap; } - else if (index == 1) { bi1 = texmap; } - else if (index == 2) { bi2 = texmap; } - else if (index == 3) { bi4 = texmap; } - } - } tevindref; + if (index == 0) { tevindref_bi0 = texmap; } + else if (index == 1) { tevindref_bi1 = texmap; } + else if (index == 2) { tevindref_bi2 = texmap; } + else if (index == 3) { tevindref_bi4 = texmap; } + } - u32 tevorders_n_texcoord1 : 24; // 8 x 3 bit - u32 tevorders_n_texcoord2 : 24; // 8 x 3 bit - struct + u64 tevorders_n_texcoord : 48; // 16 x 3 bits + + u64 tevind_n_sw : 48; // 16 x 3 bits + u64 tevind_n_tw : 48; // 16 x 3 bits + u32 tevind_n_fb_addprev : 16; // 16 x 1 bit + u32 tevind_n_bs : 32; // 16 x 2 bits + u32 tevind_n_fmt : 32; // 16 x 2 bits + u32 tevind_n_bt : 32; // 16 x 2 bits + u64 tevind_n_bias : 48; // 16 x 3 bits + u64 tevind_n_mid : 64; // 16 x 4 bits + + // NOTE: These assume that the affected bits are zero before calling + void Set_tevind_sw(int index, u64 val) { - u32 sw1 : 24; // 8 x 3 bit - u32 sw2 : 24; // 8 x 3 bit - u32 tw1 : 24; // 8 x 3 bit - u32 tw2 : 24; // 8 x 3 bit - u32 fb_addprev : 16; // 16 x 1 bit - u32 bs : 32; // 16 x 2 bit - u32 fmt : 32; // 16 x 2 bit - u32 bt : 32; // 16 x 2 bit - u32 bias1 : 24; // 8 x 3 bit - u32 bias2 : 24; // 8 x 3 bit - u32 mid1 : 32; // 8 x 4 bit - u32 mid2 : 32; // 8 x 4 bit - - // NOTE: These assume that the affected bits are zero before calling - void Set_sw(int index, u32 val) - { - if (index < 8) sw1 |= val << (3*index); - else sw2 |= val << (3*index - 24); - } - void Set_tw(int index, u32 val) - { - if (index < 8) tw1 |= val << (3*index); - else tw2 |= val << (3*index - 24); - } - void Set_bias(int index, u32 val) - { - if (index < 8) bias1 |= val << (3*index); - else bias2 |= val << (3*index - 24); - } - void Set_mid(int index, u32 val) - { - if (index < 8) mid1 |= val << (4*index); - else mid2 |= val << (4*index - 32); - } - } tevind_n; - - struct + tevind_n_sw |= val << (3*index); + } + void Set_tevind_tw(int index, u64 val) { - u32 swap1 : 2; - u32 swap2 : 2; - u32 kcsel0 : 5; - u32 kasel0 : 5; - u32 kcsel1 : 5; - u32 kasel1 : 5; - - void set_kcsel(int i, u32 value) { if (i) kcsel1 = value; else kcsel0 = value; } - void set_kasel(int i, u32 value) { if( i) kasel1 = value; else kasel0 = value; } - } tevksel[8]; - - struct + tevind_n_tw |= val << (3*index); + } + void Set_tevind_bias(int index, u64 val) { - union { - struct //abc=8bit,d=10bit - { - u32 d : 4; // TEVSELCC_X - u32 c : 4; // TEVSELCC_X - u32 b : 4; // TEVSELCC_X - u32 a : 4; // TEVSELCC_X - - u32 bias : 2; - u32 op : 1; - u32 clamp : 1; - - u32 shift : 2; - u32 dest : 2; //1,2,3 - }; - u32 hex : 24; - } colorC; - union { - struct - { - u32 rswap : 2; - u32 tswap : 2; - u32 d : 3; // TEVSELCA_ - u32 c : 3; // TEVSELCA_ - u32 b : 3; // TEVSELCA_ - u32 a : 3; // TEVSELCA_ - - u32 bias : 2; //GXTevBias - u32 op : 1; - u32 clamp : 1; - - u32 shift : 2; - u32 dest : 2; //1,2,3 - }; - u32 hex : 24; - } alphaC; - } combiners[16]; - struct + tevind_n_bias |= val << (3*index); + } + void Set_tevind_mid(int index, u64 val) { - u32 comp0 : 3; - u32 comp1 : 3; - u32 logic : 2; - u32 use_zcomploc_hack : 1; - } alpha_test; + tevind_n_mid |= val << (4*index); + } - union { - struct - { - u32 proj : 1; // 0 - perspective, 1 - orthographic - u32 fsel : 3; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 - backward exp, 7 - backward exp2 - u32 RangeBaseEnabled : 1; - }; - u32 hex : 4; - } fog; + u32 tevksel_n_swap1 : 16; // 8x2 bits + u32 tevksel_n_swap2 : 16; // 8x2 bits + u64 tevksel_n_kcsel0 : 40; // 8x5 bits + u64 tevksel_n_kasel0 : 40; // 8x5 bits + u64 tevksel_n_kcsel1 : 40; // 8x5 bits + u64 tevksel_n_kasel1 : 40; // 8x5 bits + void set_tevksel_kcsel(int index, int i, u32 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); } + void set_tevksel_kasel(int index, int i, u32 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); } + + u64 cc_n_d : 64; // 16x4 bits + u64 cc_n_c : 64; // 16x4 bits + u64 cc_n_b : 64; // 16x4 bits + u64 cc_n_a : 64; // 16x4 bits + u32 cc_n_bias : 32; // 16x2 bits + u32 cc_n_op : 16; // 16x1 bit + u32 cc_n_clamp : 16; // 16x1 bit + u32 cc_n_shift : 32; // 16x2 bits + u32 cc_n_dest : 32; // 16x2 bits + + u32 ac_n_rswap : 32; // 16x2 bits + u32 ac_n_tswap : 32; // 16x2 bits + u64 ac_n_d : 48; // 16x3 bits + u64 ac_n_c : 48; // 16x3 bits + u64 ac_n_b : 48; // 16x3 bits + u64 ac_n_a : 48; // 16x3 bits + u32 ac_n_bias : 32; // 16x2 bits + u32 ac_n_op : 16; // 16x1 bit + u32 ac_n_clamp : 16; // 16x1 bit + u32 ac_n_shift : 32; // 16x2 bits + u32 ac_n_dest : 32; // 16x2 bits + + u32 alpha_test_comp0 : 3; + u32 alpha_test_comp1 : 3; + u32 alpha_test_logic : 2; + u32 alpha_test_use_zcomploc_hack : 1; + + u32 fog_proj : 1; + u32 fog_fsel : 3; + u32 fog_RangeBaseEnabled : 1; u32 ztex_op : 2; @@ -221,6 +178,7 @@ struct pixel_shader_uid_data LightingUidData lighting; }; +//#pragma pack() typedef ShaderUid PixelShaderUid; typedef ShaderCode PixelShaderCode; // TODO: Obsolete diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h index ebf2ae1b87..b8820e2e98 100644 --- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h @@ -199,22 +199,21 @@ static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const object.Write(";\n"); } +#pragma pack(4) /** * Common uid data used for shader generators that use lighting calculations. * Expected to be stored as a member called "lighting". */ struct LightingUidData { - struct - { - u32 matsource : 1; - u32 enablelighting : 1; - u32 ambsource : 1; - u32 diffusefunc : 2; - u32 attnfunc : 2; - u32 light_mask : 8; - } lit_chans[4]; + u32 matsource : 4; // 4x1 bit + u32 enablelighting : 4; // 4x1 bit + u32 ambsource : 4; // 4x1 bit + u32 diffusefunc : 8; // 4x2 bits + u32 attnfunc : 8; // 4x2 bits + u32 light_mask : 32; // 4x8 bits }; +#pragma pack() /** * Checks if there has been From 7e0db58b1ee2e5ac58c5fd84560506b9bb33ae8d Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Wed, 1 May 2013 11:42:00 +0200 Subject: [PATCH 47/54] Fix stuff --- Source/Core/VideoCommon/Src/BPStructs.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index 314041f894..31f873acc6 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -356,6 +356,7 @@ void BPWritten(const BPCmd& bp) PRIM_LOG("alphacmp: ref0=%d, ref1=%d, comp0=%d, comp1=%d, logic=%d", bpmem.alpha_test.ref0, bpmem.alpha_test.ref1, bpmem.alpha_test.comp0, bpmem.alpha_test.comp1, bpmem.alpha_test.logic); PixelShaderManager::SetAlpha(bpmem.alpha_test); + g_renderer->SetColorMask(); break; case BPMEM_BIAS: // BIAS PRIM_LOG("ztex bias=0x%x", bpmem.ztex1.bias); From 20719081dfe51c352b54fb82be34fc418a4f4074 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 11:37:41 +0200 Subject: [PATCH 48/54] PixelShaderGen: Move RegisterStates from global storage to stack. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index ab3df05781..2aa2468eef 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -24,11 +24,6 @@ // output is given by .outreg // tevtemp is set according to swapmodetables and -template static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType); -template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); -template static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); -template static void WriteFog(T& out, pixel_shader_uid_data& uid_data); - static const char *tevKSelTableC[] = // KCSEL { "1.0f,1.0f,1.0f", // 1 = 0x00 @@ -207,10 +202,6 @@ static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" }; static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" }; -static char swapModeTable[4][5]; - -static char text[16384]; - struct RegisterState { bool ColorNeedOverflowControl; @@ -218,7 +209,9 @@ struct RegisterState bool AuxStored; }; -static RegisterState RegisterStates[4]; +static char swapModeTable[4][5]; + +static char text[16384]; static void BuildSwapModeTable() { @@ -233,6 +226,11 @@ static void BuildSwapModeTable() } } +template static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]); +template static void SampleTexture(T& out, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); +template static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); +template static void WriteFog(T& out, pixel_shader_uid_data& uid_data); + template static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { @@ -241,8 +239,6 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api pixel_shader_uid_data& uid_data = (&out.template GetUidData() != NULL) ? out.template GetUidData() : dummy_data; - ERROR_LOG(VIDEO, "%lu", sizeof(pixel_shader_uid_data)); - out.SetBuffer(text); if (out.GetBuffer() != NULL) setlocale(LC_NUMERIC, "C"); // Reset locale for compilation @@ -513,6 +509,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api } } + RegisterState RegisterStates[4]; RegisterStates[0].AlphaNeedOverflowControl = false; RegisterStates[0].ColorNeedOverflowControl = false; RegisterStates[0].AuxStored = false; @@ -526,7 +523,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api // Uid fields for BuildSwapModeTable are set in WriteStage BuildSwapModeTable(); for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, uid_data, i, ApiType); // build the equation for this stage + WriteStage(out, uid_data, i, ApiType, RegisterStates); // build the equation for this stage if (numStages) { @@ -676,7 +673,7 @@ static const char *TEVCMPAlphaOPTable[16] = template -static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType) +static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4]) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; From f0ea525d3b4dea15363674c822943ecf38da26b3 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 11:39:50 +0200 Subject: [PATCH 49/54] PixelShaderGen: Change a comment slightly. --- Source/Core/VideoCommon/Src/PixelShaderGen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 5ae3febfa3..2305be7c4d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -55,7 +55,7 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, {I_PMATERIALS, C_PMATERIALS, 4 }, }; -// TODO: Packing? +// TODO: Should compact packing be enabled? //#pragma pack(4) struct pixel_shader_uid_data { From c505260ec18656aa17c4ce80f5bbc72f4d10e952 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 12:20:21 +0200 Subject: [PATCH 50/54] Fix some merge conflicts. --- Source/Core/VideoCommon/Src/LightingShaderGen.h | 2 +- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.h b/Source/Core/VideoCommon/Src/LightingShaderGen.h index 36bc52749a..ace785a477 100644 --- a/Source/Core/VideoCommon/Src/LightingShaderGen.h +++ b/Source/Core/VideoCommon/Src/LightingShaderGen.h @@ -251,7 +251,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com GenerateLightShader(object, uid_data, i, lit_index, lightsName, coloralpha); } } - object.Write("%s%d = mat * saturate(lacc);\n", dest, j); + object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j); object.Write("}\n"); } } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 9b8f3a9426..83b61cb164 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -945,7 +945,7 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) { - out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); + out.Write("cc1 = %s(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); RegisterStates[2].AlphaNeedOverflowControl = false; RegisterStates[2].ColorNeedOverflowControl = false; } From 7a01ceba731ba68baed391e4306ccd3c1dd11564 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 12:51:57 +0200 Subject: [PATCH 51/54] PixelShaderGen: Fix various merge conflicts and a critical regression from revision 32b78a85. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 47 ++++++++++--------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 1 + 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 83b61cb164..36a6dc64e5 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -269,7 +269,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; - bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; + const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; const bool bOpenGL = ApiType == API_OPENGL; out.Write("//Pixel Shader for TEV stages\n"); @@ -288,7 +288,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api out.Write("float fmod( float x, float y )\n"); out.Write("{\n"); out.Write("\tfloat z = fract( abs( x / y) ) * abs( y );\n"); - out.Write("\treturn (x < 0) ? -z : z;\n"); + out.Write("\treturn (x < 0.0) ? -z : z;\n"); out.Write("}\n"); // Declare samplers @@ -587,10 +587,11 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either - bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; + const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; uid_data.ztex_op = bpmem.ztex2.op; uid_data.per_pixel_depth = per_pixel_depth; + uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; // Note: z-textures are not written to depth buffer if early depth test is used @@ -810,26 +811,26 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - uid_data.cc_n_d = cc.d; - uid_data.cc_n_c = cc.c; - uid_data.cc_n_b = cc.b; - uid_data.cc_n_a = cc.a; - uid_data.cc_n_bias = cc.bias; - uid_data.cc_n_op = cc.op; - uid_data.cc_n_clamp = cc.clamp; - uid_data.cc_n_shift = cc.shift; - uid_data.cc_n_dest = cc.dest; - uid_data.ac_n_rswap = ac.rswap; - uid_data.ac_n_tswap = ac.tswap; - uid_data.ac_n_d = ac.d; - uid_data.ac_n_c = ac.c; - uid_data.ac_n_b = ac.b; - uid_data.ac_n_a = ac.a; - uid_data.ac_n_bias = ac.bias; - uid_data.ac_n_op = ac.op; - uid_data.ac_n_clamp = ac.clamp; - uid_data.ac_n_shift = ac.shift; - uid_data.ac_n_dest = ac.dest; + uid_data.cc_n_d = cc.d << (4*n); + uid_data.cc_n_c = cc.c << (4*n); + uid_data.cc_n_b = cc.b << (4*n); + uid_data.cc_n_a = cc.a << (4*n); + uid_data.cc_n_bias = cc.bias << (2*n); + uid_data.cc_n_op = cc.op << n; + uid_data.cc_n_clamp = cc.clamp << n; + uid_data.cc_n_shift = cc.shift << (2*n); + uid_data.cc_n_dest = cc.dest << (2*n); + uid_data.ac_n_rswap = ac.rswap << (2*n); + uid_data.ac_n_tswap = ac.tswap << (2*n); + uid_data.ac_n_d = ac.d << (3*n); + uid_data.ac_n_c = ac.c << (3*n); + uid_data.ac_n_b = ac.b << (3*n); + uid_data.ac_n_a = ac.a << (3*n); + uid_data.ac_n_bias = ac.bias << (2*n); + uid_data.ac_n_op = ac.op << n; + uid_data.ac_n_clamp = ac.clamp << n; + uid_data.ac_n_shift = ac.shift << (2*n); + uid_data.ac_n_dest = ac.dest << (2*n); if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 2305be7c4d..58e451d37d 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -171,6 +171,7 @@ struct pixel_shader_uid_data u32 ztex_op : 2; + u32 fast_depth_calc : 1; u32 per_pixel_depth : 1; u32 bHasIndStage : 16; From 2b2ca5260f0e28e115a3f835c4e83e5714b41e99 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 13:03:38 +0200 Subject: [PATCH 52/54] PixelShaderGen: Fixed some bugs, removed unused shader uid fields. --- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 18 +++++++++--------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 10 ++-------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 36a6dc64e5..93e55e9353 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -720,7 +720,7 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE out.Write("// TEV stage %d\n", n); uid_data.bHasIndStage |= bHasIndStage << n; - uid_data.tevorders_n_texcoord |= texcoord << (3 * n); + uid_data.tevorders_n_texcoord |= (u64)texcoord << (3 * n); if (bHasIndStage) { uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n); @@ -811,10 +811,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - uid_data.cc_n_d = cc.d << (4*n); - uid_data.cc_n_c = cc.c << (4*n); - uid_data.cc_n_b = cc.b << (4*n); - uid_data.cc_n_a = cc.a << (4*n); + uid_data.cc_n_d = (u64)cc.d << (4*n); + uid_data.cc_n_c = (u64)cc.c << (4*n); + uid_data.cc_n_b = (u64)cc.b << (4*n); + uid_data.cc_n_a = (u64)cc.a << (4*n); uid_data.cc_n_bias = cc.bias << (2*n); uid_data.cc_n_op = cc.op << n; uid_data.cc_n_clamp = cc.clamp << n; @@ -822,10 +822,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE uid_data.cc_n_dest = cc.dest << (2*n); uid_data.ac_n_rswap = ac.rswap << (2*n); uid_data.ac_n_tswap = ac.tswap << (2*n); - uid_data.ac_n_d = ac.d << (3*n); - uid_data.ac_n_c = ac.c << (3*n); - uid_data.ac_n_b = ac.b << (3*n); - uid_data.ac_n_a = ac.a << (3*n); + uid_data.ac_n_d = (u64)ac.d << (3*n); + uid_data.ac_n_c = (u64)ac.c << (3*n); + uid_data.ac_n_b = (u64)ac.b << (3*n); + uid_data.ac_n_a = (u64)ac.a << (3*n); uid_data.ac_n_bias = ac.bias << (2*n); uid_data.ac_n_op = ac.op << n; uid_data.ac_n_clamp = ac.clamp << n; diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 58e451d37d..a3ebdab3c1 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -69,13 +69,7 @@ struct pixel_shader_uid_data u32 nIndirectStagesUsed : 8; - u32 texMtxInfo_n_unknown : 8; // 8x1 bit u32 texMtxInfo_n_projection : 8; // 8x1 bit - u32 texMtxInfo_n_inputform : 16; // 8x2 bits - u32 texMtxInfo_n_texgentype : 24; // 8x3 bits - u64 texMtxInfo_n_sourcerow : 40; // 8x5 bits - u32 texMtxInfo_n_embosssourceshift : 24; // 8x3 bits - u32 texMtxInfo_n_embosslightshift : 24; // 8x3 bits u32 tevindref_bi0 : 3; u32 tevindref_bc0 : 3; @@ -135,8 +129,8 @@ struct pixel_shader_uid_data u64 tevksel_n_kasel0 : 40; // 8x5 bits u64 tevksel_n_kcsel1 : 40; // 8x5 bits u64 tevksel_n_kasel1 : 40; // 8x5 bits - void set_tevksel_kcsel(int index, int i, u32 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); } - void set_tevksel_kasel(int index, int i, u32 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); } + void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); } + void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); } u64 cc_n_d : 64; // 16x4 bits u64 cc_n_c : 64; // 16x4 bits From f724b47f45cc737296edf1e042e22f060daf3b66 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 13:08:38 +0200 Subject: [PATCH 53/54] PixelShaderGen: Fix more critical issues, some of which are regressions of revision 32b78a85. --- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 93e55e9353..70931a9261 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -811,26 +811,26 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; - uid_data.cc_n_d = (u64)cc.d << (4*n); - uid_data.cc_n_c = (u64)cc.c << (4*n); - uid_data.cc_n_b = (u64)cc.b << (4*n); - uid_data.cc_n_a = (u64)cc.a << (4*n); - uid_data.cc_n_bias = cc.bias << (2*n); - uid_data.cc_n_op = cc.op << n; - uid_data.cc_n_clamp = cc.clamp << n; - uid_data.cc_n_shift = cc.shift << (2*n); - uid_data.cc_n_dest = cc.dest << (2*n); - uid_data.ac_n_rswap = ac.rswap << (2*n); - uid_data.ac_n_tswap = ac.tswap << (2*n); - uid_data.ac_n_d = (u64)ac.d << (3*n); - uid_data.ac_n_c = (u64)ac.c << (3*n); - uid_data.ac_n_b = (u64)ac.b << (3*n); - uid_data.ac_n_a = (u64)ac.a << (3*n); - uid_data.ac_n_bias = ac.bias << (2*n); - uid_data.ac_n_op = ac.op << n; - uid_data.ac_n_clamp = ac.clamp << n; - uid_data.ac_n_shift = ac.shift << (2*n); - uid_data.ac_n_dest = ac.dest << (2*n); + uid_data.cc_n_d |= (u64)cc.d << (4*n); + uid_data.cc_n_c |= (u64)cc.c << (4*n); + uid_data.cc_n_b |= (u64)cc.b << (4*n); + uid_data.cc_n_a |= (u64)cc.a << (4*n); + uid_data.cc_n_bias |= cc.bias << (2*n); + uid_data.cc_n_op |= cc.op << n; + uid_data.cc_n_clamp |= cc.clamp << n; + uid_data.cc_n_shift |= cc.shift << (2*n); + uid_data.cc_n_dest |= cc.dest << (2*n); + uid_data.ac_n_rswap |= ac.rswap << (2*n); + uid_data.ac_n_tswap |= ac.tswap << (2*n); + uid_data.ac_n_d |= (u64)ac.d << (3*n); + uid_data.ac_n_c |= (u64)ac.c << (3*n); + uid_data.ac_n_b |= (u64)ac.b << (3*n); + uid_data.ac_n_a |= (u64)ac.a << (3*n); + uid_data.ac_n_bias |= ac.bias << (2*n); + uid_data.ac_n_op |= ac.op << n; + uid_data.ac_n_clamp |= ac.clamp << n; + uid_data.ac_n_shift |= ac.shift << (2*n); + uid_data.ac_n_dest |= ac.dest << (2*n); if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC @@ -840,10 +840,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) { const int i = bpmem.combiners[n].alphaC.rswap; - uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); - uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); - uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); - uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); @@ -863,10 +863,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE } const int i = bpmem.combiners[n].alphaC.tswap; - uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); - uid_data.tevksel_n_swap1 = bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); - uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); - uid_data.tevksel_n_swap2 = bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2 ].swap1 << (2 * (i*2 )); + uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1)); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2 ].swap2 << (2 * (i*2 )); + uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1)); char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texmap = bpmem.tevorders[n/2].getTexMap(n&1); From 88163691443af7032b55e83ad78fe946f124b25f Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 17 Jun 2013 13:17:25 +0200 Subject: [PATCH 54/54] PixelShaderGen: Add some TODOs. Totally the perfect commit message for a final commit before merging :p --- Source/Core/VideoCommon/Src/PixelShaderGen.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index a3ebdab3c1..826ec561d0 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -59,6 +59,8 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, //#pragma pack(4) struct pixel_shader_uid_data { + // TODO: Optimize field order for easy access! + u32 components; u32 dstAlphaMode : 2; u32 Pretest : 2; @@ -171,6 +173,7 @@ struct pixel_shader_uid_data u32 xfregs_numTexGen_numTexGens : 4; + // TODO: I think we're fine without an enablePixelLighting field, should probably double check, though.. LightingUidData lighting; }; //#pragma pack()