diff --git a/Source/Core/Common/Src/LinearDiskCache.h b/Source/Core/Common/Src/LinearDiskCache.h index bb6e21197e..93f3da981e 100644 --- a/Source/Core/Common/Src/LinearDiskCache.h +++ b/Source/Core/Common/Src/LinearDiskCache.h @@ -26,7 +26,7 @@ // shader cache for every revision, graphics-related or not, which is simply annoying. enum { - LINEAR_DISKCACHE_VER = 6592 + LINEAR_DISKCACHE_VER = 6618 }; // On disk format: diff --git a/Source/Core/VideoCommon/Src/Debugger.cpp b/Source/Core/VideoCommon/Src/Debugger.cpp index 94531e46f0..09ac246e70 100644 --- a/Source/Core/VideoCommon/Src/Debugger.cpp +++ b/Source/Core/VideoCommon/Src/Debugger.cpp @@ -101,21 +101,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path) if (!useDstAlpha) { output = "Destination alpha disabled:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); + output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components); } else { if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend) { output = "Using dual source blending for destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); + output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components); } else { output = "Using two passes for emulating destination alpha:\n"; - output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); + output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components); output += "\n\nDestination alpha pass shader:\n"; - output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); + output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components); } } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 57ec4cd2af..0752fa73be 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -160,7 +160,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode) // output is given by .outreg // tevtemp is set according to swapmodetables and -static void WriteStage(char *&p, int n, API_TYPE ApiType); +static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms); static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); // static void WriteAlphaCompare(char *&p, int num, int comp); static bool WriteAlphaTest(char *&p, API_TYPE ApiType); @@ -442,7 +442,7 @@ char *GeneratePixelLightShader(char *p, int index, const LitChannel& chan, const -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 components) +const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 maxUniforms, u32 components) { setlocale(LC_NUMERIC, "C"); // Reset locale for compilation text[sizeof(text) - 1] = 0x7C; // canary @@ -504,10 +504,13 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA); WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS); WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS); - WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); - WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); - WRITE(p, "uniform float4 "I_FOG"[2] : register(c%d);\n", C_FOG); - if(g_ActiveConfig.bEnablePixelLigting) + if(C_INDTEXSCALE + 2 <= maxUniforms) + WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE); + if(C_INDTEXMTX + 6 <= maxUniforms) + WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX); + if(C_FOG + 2 <= maxUniforms) + WRITE(p, "uniform float4 "I_FOG"[2] : register(c%d);\n", C_FOG); + if(g_ActiveConfig.bEnablePixelLigting && C_PLIGHTS + 40 <= maxUniforms && C_PMATERIALS + 4 <= maxUniforms) { WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n"); WRITE(p,"typedef struct { Light lights[8]; } s_"I_PLIGHTS";\n"); @@ -588,7 +591,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType " float2 wrappedcoord, tempcoord;\n" " float4 cc0, cc1, cc2, cprev,crastemp,ckonsttemp;\n\n"); - if(g_ActiveConfig.bEnablePixelLigting) + if(g_ActiveConfig.bEnablePixelLigting && C_PLIGHTS + 40 <= maxUniforms && C_PMATERIALS + 4 <= maxUniforms) { if (xfregs.numTexGens < 7) { @@ -748,7 +751,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType { int texcoord = bpmem.tevindref.getTexCoord(i); - if (texcoord < numTexgen) + if (texcoord < numTexgen && C_INDTEXSCALE + 2 <= maxUniforms) WRITE(p, "tempcoord = uv%d.xy * "I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy"); else WRITE(p, "tempcoord = float2(0.0f, 0.0f);\n"); @@ -770,7 +773,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType } for (int i = 0; i < numStages; i++) - WriteStage(p, i, ApiType); //build the equation for this stage + WriteStage(p, i, ApiType,maxUniforms); //build the equation for this stage if(numStages) { @@ -832,7 +835,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WRITE(p, " ocol0 = float4(prev.rgb, "I_ALPHA"[0].a);\n"); else { - WriteFog(p); + if(C_FOG + 2 <= maxUniforms) + WriteFog(p); WRITE(p, " ocol0 = prev;\n"); } @@ -900,7 +904,7 @@ static const char *TEVCMPAlphaOPTable[16] = }; -static void WriteStage(char *&p, int n, API_TYPE ApiType) +static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms) { char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; @@ -934,18 +938,18 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType) // multiply by offset matrix and scale if (bpmem.tevind[n].mid != 0) { - if (bpmem.tevind[n].mid <= 3) + if (bpmem.tevind[n].mid <= 3 && C_INDTEXMTX + 6 <= maxUniforms) { int mtxidx = 2*(bpmem.tevind[n].mid-1); WRITE(p, "float2 indtevtrans%d = float2(dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", n, mtxidx, n, mtxidx+1, n); } - else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) + else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord && C_INDTEXMTX + 6 <= maxUniforms) { // s matrix int mtxidx = 2*(bpmem.tevind[n].mid-5); WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); } - else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) + else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord && C_INDTEXMTX + 6 <= maxUniforms) { // t matrix int mtxidx = 2*(bpmem.tevind[n].mid-9); WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index baf9db8aa6..9eec0b6dd2 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -112,7 +112,7 @@ enum DSTALPHA_MODE DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending }; -const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 components); +const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 maxUniforms, u32 components); void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode); extern PIXELSHADERUID last_pixel_shader_uid; diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 76bc2e7be6..1e3ed1d321 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -215,7 +215,7 @@ void PixelShaderManager::SetConstants() s_bFogParamChanged = false; } - if (nLightsChanged[0] >= 0) + if (g_ActiveConfig.bEnablePixelLigting && nLightsChanged[0] >= 0) // config check added because the code in here was crashing for me inside SetPSConstant4f { // lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats int istart = nLightsChanged[0] / 0x10; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp index 0799e4e70f..5f5193f45b 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp @@ -353,7 +353,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) } // Need to compile a new shader - const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components); + const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, 65536, components); D3DBlob* pbytecode; if (!D3D::CompilePixelShader(code, strlen(code), &pbytecode)) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index 6288f8c268..7eaa51bbfa 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -44,25 +44,29 @@ static LinearDiskCache g_ps_disk_cache; static std::set unique_shaders; #define MAX_SSAA_SHADERS 3 +enum +{ + COPY_TYPE_DIRECT, + COPY_TYPE_MATRIXCOLOR, + NUM_COPY_TYPES +}; -static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[MAX_SSAA_SHADERS]; -static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[MAX_SSAA_SHADERS]; -static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[MAX_SSAA_SHADERS]; +static LPDIRECT3DPIXELSHADER9 s_CopyProgram[NUM_COPY_TYPES][PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES][MAX_SSAA_SHADERS]; static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0; LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode) { - return s_ColorMatrixProgram[SSAAMode % MAX_SSAA_SHADERS]; + return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS]; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode) +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode, int depthConversionType) { - return s_DepthMatrixProgram[SSAAMode % MAX_SSAA_SHADERS]; + return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][depthConversionType % NUM_DEPTH_CONVERSION_TYPES][SSAAMode % MAX_SSAA_SHADERS]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode) { - return s_ColorCopyProgram[SSAAMode % MAX_SSAA_SHADERS]; + return s_CopyProgram[COPY_TYPE_DIRECT][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() @@ -95,132 +99,120 @@ public: } }; +#define WRITE p+=sprintf + +static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConversionType, int SSAAMode) +{ + //Used for Copy/resolve the color buffer + //Color conversion Programs + //Depth copy programs + // this should create the same shaders as before (plus some extras added for DF16), just... more manageably than listing the full program for each combination + char text[3072]; + + setlocale(LC_NUMERIC, "C"); // Reset locale for compilation + text[sizeof(text) - 1] = 0x7C; // canary + + char* p = text; + WRITE(p, "// Copy/Color Matrix/Depth Matrix shader (matrix=%d, depth=%d, ssaa=%d)\n", copyMatrixType, depthConversionType, SSAAMode); + + WRITE(p, "uniform sampler samp0 : register(s0);\n"); + if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) + WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX); + WRITE(p, "void main(\n" + "out float4 ocol0 : COLOR0,\n"); + + switch(SSAAMode % MAX_SSAA_SHADERS) + { + case 0: // 1 Sample + WRITE(p, "in float2 uv0 : TEXCOORD0){\n" + "float4 texcol = tex2D(samp0,uv0);\n"); + break; + case 1: // 1 Samples SSAA + WRITE(p, "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1){\n" + "float4 texcol = tex2D(samp0,uv0.xy);\n"); + break; + case 2: // 4 Samples SSAA + WRITE(p, "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3){\n" + "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"); + break; + } + + switch(depthConversionType % PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES) + { + case PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE: + break; + case PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT: + // this is probably wrong. but it works better than the 24-bit conversion we used to generate in this case. + WRITE(p, "float4 EncodedDepth = frac((texcol.r * (65535.0f/65536.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (65536.0f/65535.0f)),1.0f);\n"); + break; + case PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT: + WRITE(p, "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"); + break; + } + + if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) + WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"); + else + WRITE(p, "ocol0 = texcol;\n"); + + WRITE(p, "}\n"); + if (text[sizeof(text) - 1] != 0x7C) + PanicAlert("PixelShaderCache copy shader generator - buffer too small, canary has been eaten!"); + + setlocale(LC_NUMERIC, ""); // restore locale + return D3D::CompileAndCreatePixelShader(text, (int)strlen(text)); +} + void PixelShaderCache::Init() { //program used for clear screen - char pprog[3072]; - sprintf(pprog, "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float4 incol0 : COLOR0){\n" - "ocol0 = incol0;\n" - "}\n"); - s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + { + char pprog[3072]; + sprintf(pprog, "void main(\n" + "out float4 ocol0 : COLOR0,\n" + " in float4 incol0 : COLOR0){\n" + "ocol0 = incol0;\n" + "}\n"); + s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + } - //Used for Copy/resolve the color buffer - //1 Sample - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float2 uv0 : TEXCOORD0){\n" - "ocol0 = tex2D(samp0,uv0);\n" - "}\n"); - s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF); + int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536); + bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants); - //1 Samples SSAA - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1){\n" - "ocol0 = tex2D(samp0,uv0.xy);\n" - "}\n"); - s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //4 Samples SSAA - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3){\n" - "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25;\n" - "}\n"); - s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - - - //Color conversion Programs - //1 sample - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float2 uv0 : TEXCOORD0){\n" - "float4 texcol = tex2D(samp0,uv0);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //1 samples SSAA - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //4 samples SSAA - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3){\n" - "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //Depth copy programs - //1 sample - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float2 uv0 : TEXCOORD0){\n" - "float4 texcol = tex2D(samp0,uv0);\n" - "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" - "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //1 sample SSAA - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n" - "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" - "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - //4 sample SSAA - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0,\n" - "in float4 uv1 : TEXCOORD1,\n" - "in float4 uv2 : TEXCOORD2,\n" - "in float4 uv3 : TEXCOORD3){\n" - "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n" - "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" - "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + // other screen copy/convert programs + for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) + { + for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++) + { + for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++) + { + if(ssaaMode && !s_CopyProgram[copyMatrixType][depthType][ssaaMode-1] + || depthType && !s_CopyProgram[copyMatrixType][depthType-1][ssaaMode] + || copyMatrixType && !s_CopyProgram[copyMatrixType-1][depthType][ssaaMode]) + { + // if it failed at a lower setting, it's going to fail here for the same reason it did there, + // so skip this attempt to avoid duplicate error messages. + s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL; + } + else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix) + { + // color matrix not supported, so substitute the nearest equivalent program that doesn't use it. + s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode]; + } + else + { + s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode); + } + } + } + } Clear(); @@ -248,15 +240,18 @@ void PixelShaderCache::Clear() void PixelShaderCache::Shutdown() { - for(int i = 0;i < MAX_SSAA_SHADERS; i++) - { - if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release(); - s_ColorMatrixProgram[i] = NULL; - if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release(); - s_ColorCopyProgram[i] = NULL; - if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release(); - s_DepthMatrixProgram[i] = NULL; - } + for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) + for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++) + for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++) + if(s_CopyProgram[copyMatrixType][depthType][ssaaMode] + && (copyMatrixType == 0 || s_CopyProgram[copyMatrixType][depthType][ssaaMode] != s_CopyProgram[copyMatrixType-1][depthType][ssaaMode])) + s_CopyProgram[copyMatrixType][depthType][ssaaMode]->Release(); + + for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) + for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++) + for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++) + s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL; + if (s_ClearProgram) s_ClearProgram->Release(); s_ClearProgram = NULL; @@ -296,8 +291,11 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) return (entry.shader != NULL); } + int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF); + int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536); + // Need to compile a new shader - const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, components); + const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, maxConstants, components); u32 code_hash = HashAdler32((const u8 *)code, strlen(code)); unique_shaders.insert(code_hash); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index cc27585d62..7568d76fa6 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -62,7 +62,14 @@ public: static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode); - static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode); + enum + { + DEPTH_CONVERSION_TYPE_NONE, + DEPTH_CONVERSION_TYPE_16BIT, + DEPTH_CONVERSION_TYPE_24BIT, + NUM_DEPTH_CONVERSION_TYPES + }; + static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, int depthConversionType); static LPDIRECT3DPIXELSHADER9 GetClearProgram(); }; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 3703362d68..f769f3315f 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -589,13 +589,22 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + D3DFORMAT bformat = FramebufferManager::GetEFBDepthRTSurfaceFormat(); + int depthConversionType; + if(bformat == FOURCC_RAWZ) + depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE; + else if(bformat == FOURCC_DF16) + depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT; + else + depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT; + D3D::drawShadedTexQuad( read_texture, &RectToLock, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), 4, 4, - (FramebufferManager::GetEFBDepthRTSurfaceFormat() == FOURCC_RAWZ) ? PixelShaderCache::GetColorMatrixProgram(0) : PixelShaderCache::GetDepthMatrixProgram(0), + PixelShaderCache::GetDepthMatrixProgram(0, depthConversionType), VertexShaderCache::GetSimpleVertexShader(0)); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index aa215ae4cc..627e6e0f4b 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -131,12 +131,18 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB D3DFORMAT bformat = FramebufferManager::GetEFBDepthRTSurfaceFormat(); int SSAAMode = g_ActiveConfig.iMultisampleMode; + int depthConversionType; + if(bformat == FOURCC_RAWZ || bformat == D3DFMT_D24X8 || !bFromZBuffer) + depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE; + else if(bformat == FOURCC_DF16) + depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT; + else + depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT; + D3D::drawShadedTexQuad(read_texture, &sourcerect, Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), virtualW, virtualH, - ((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer) ? - PixelShaderCache::GetDepthMatrixProgram(SSAAMode) : - PixelShaderCache::GetColorMatrixProgram(SSAAMode), + PixelShaderCache::GetDepthMatrixProgram(SSAAMode, depthConversionType), VertexShaderCache::GetSimpleVertexShader(SSAAMode)); Rendersurf->Release(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp index b734e21c91..3d1808c6e1 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp @@ -52,6 +52,7 @@ static LPDIRECT3DPIXELSHADER9 s_yuyvToRgbProgram = NULL; // Not all slots are taken - but who cares. const u32 NUM_ENCODING_PROGRAMS = 64; static LPDIRECT3DPIXELSHADER9 s_encodingPrograms[NUM_ENCODING_PROGRAMS]; +static bool s_encodingProgramsFailed[NUM_ENCODING_PROGRAMS]; void CreateRgbToYuyvProgram() { @@ -121,6 +122,13 @@ LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format) if (!s_encodingPrograms[format]) { + if(s_encodingProgramsFailed[format]) + { + // we already failed to create a shader for this format, + // so instead of re-trying and showing the same error message every frame, just return. + return NULL; + } + const char* shader = TextureConversionShader::GenerateEncodingShader(format,API_D3D9); #if defined(_DEBUG) || defined(DEBUGFAST) @@ -135,6 +143,7 @@ LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format) s_encodingPrograms[format] = D3D::CompileAndCreatePixelShader(shader, (int)strlen(shader)); if (!s_encodingPrograms[format]) { ERROR_LOG(VIDEO, "Failed to create encoding fragment program"); + s_encodingProgramsFailed[format] = true; } } return s_encodingPrograms[format]; @@ -145,6 +154,7 @@ void Init() for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++) { s_encodingPrograms[i] = NULL; + s_encodingProgramsFailed[i] = false; } for (unsigned int i = 0; i < NUM_TRANSFORM_BUFFERS; i++) { diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index 15b5668df0..a24fc93893 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -219,7 +219,7 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp PSCacheEntry& newentry = PixelShaders[uid]; newentry.frameCount = frameCount; pShaderLast = &newentry.shader; - const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components); + const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, 65536, components); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {