reduced frequency of dx9 ps_2_0 pixel generation errors, and made dx9 efb depth peek of 16-bit depth buffer not use 24-bit adjustment factor. shouldn't affect other the plugins.

(probably nobody else cares, but I need at least one video plugin that actually works on this computer)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6618 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nitsuja- 2010-12-19 20:59:23 +00:00
parent 47454a4ed9
commit 98fe8437ae
12 changed files with 200 additions and 166 deletions

View File

@ -26,7 +26,7 @@
// shader cache for every revision, graphics-related or not, which is simply annoying.
enum
{
LINEAR_DISKCACHE_VER = 6592
LINEAR_DISKCACHE_VER = 6618
};
// On disk format:

View File

@ -101,21 +101,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path)
if (!useDstAlpha)
{
output = "Destination alpha disabled:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
}
else
{
if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
output = "Using dual source blending for destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
}
else
{
output = "Using two passes for emulating destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n";
output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, 65536, g_nativeVertexFmt->m_components);
}
}

View File

@ -160,7 +160,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
// output is given by .outreg
// tevtemp is set according to swapmodetables and
static void WriteStage(char *&p, int n, API_TYPE ApiType);
static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms);
static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
// static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, API_TYPE ApiType);
@ -442,7 +442,7 @@ char *GeneratePixelLightShader(char *p, int index, const LitChannel& chan, const
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 components)
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 maxUniforms, u32 components)
{
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
@ -504,10 +504,13 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
WRITE(p, "uniform float4 "I_ALPHA"[1] : register(c%d);\n", C_ALPHA);
WRITE(p, "uniform float4 "I_TEXDIMS"[8] : register(c%d);\n", C_TEXDIMS);
WRITE(p, "uniform float4 "I_ZBIAS"[2] : register(c%d);\n", C_ZBIAS);
if(C_INDTEXSCALE + 2 <= maxUniforms)
WRITE(p, "uniform float4 "I_INDTEXSCALE"[2] : register(c%d);\n", C_INDTEXSCALE);
if(C_INDTEXMTX + 6 <= maxUniforms)
WRITE(p, "uniform float4 "I_INDTEXMTX"[6] : register(c%d);\n", C_INDTEXMTX);
if(C_FOG + 2 <= maxUniforms)
WRITE(p, "uniform float4 "I_FOG"[2] : register(c%d);\n", C_FOG);
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && C_PLIGHTS + 40 <= maxUniforms && C_PMATERIALS + 4 <= maxUniforms)
{
WRITE(p,"typedef struct { float4 col; float4 cosatt; float4 distatt; float4 pos; float4 dir; } Light;\n");
WRITE(p,"typedef struct { Light lights[8]; } s_"I_PLIGHTS";\n");
@ -588,7 +591,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
" float2 wrappedcoord, tempcoord;\n"
" float4 cc0, cc1, cc2, cprev,crastemp,ckonsttemp;\n\n");
if(g_ActiveConfig.bEnablePixelLigting)
if(g_ActiveConfig.bEnablePixelLigting && C_PLIGHTS + 40 <= maxUniforms && C_PMATERIALS + 4 <= maxUniforms)
{
if (xfregs.numTexGens < 7)
{
@ -748,7 +751,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{
int texcoord = bpmem.tevindref.getTexCoord(i);
if (texcoord < numTexgen)
if (texcoord < numTexgen && C_INDTEXSCALE + 2 <= maxUniforms)
WRITE(p, "tempcoord = uv%d.xy * "I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
else
WRITE(p, "tempcoord = float2(0.0f, 0.0f);\n");
@ -770,7 +773,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
}
for (int i = 0; i < numStages; i++)
WriteStage(p, i, ApiType); //build the equation for this stage
WriteStage(p, i, ApiType,maxUniforms); //build the equation for this stage
if(numStages)
{
@ -832,6 +835,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
WRITE(p, " ocol0 = float4(prev.rgb, "I_ALPHA"[0].a);\n");
else
{
if(C_FOG + 2 <= maxUniforms)
WriteFog(p);
WRITE(p, " ocol0 = prev;\n");
}
@ -900,7 +904,7 @@ static const char *TEVCMPAlphaOPTable[16] =
};
static void WriteStage(char *&p, int n, API_TYPE ApiType)
static void WriteStage(char *&p, int n, API_TYPE ApiType,int maxUniforms)
{
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
@ -934,18 +938,18 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
// multiply by offset matrix and scale
if (bpmem.tevind[n].mid != 0)
{
if (bpmem.tevind[n].mid <= 3)
if (bpmem.tevind[n].mid <= 3 && C_INDTEXMTX + 6 <= maxUniforms)
{
int mtxidx = 2*(bpmem.tevind[n].mid-1);
WRITE(p, "float2 indtevtrans%d = float2(dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot("I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n",
n, mtxidx, n, mtxidx+1, n);
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord && C_INDTEXMTX + 6 <= maxUniforms)
{ // s matrix
int mtxidx = 2*(bpmem.tevind[n].mid-5);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord && C_INDTEXMTX + 6 <= maxUniforms)
{ // t matrix
int mtxidx = 2*(bpmem.tevind[n].mid-9);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);

View File

@ -112,7 +112,7 @@ enum DSTALPHA_MODE
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 components);
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,u32 maxUniforms, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode);
extern PIXELSHADERUID last_pixel_shader_uid;

View File

@ -215,7 +215,7 @@ void PixelShaderManager::SetConstants()
s_bFogParamChanged = false;
}
if (nLightsChanged[0] >= 0)
if (g_ActiveConfig.bEnablePixelLigting && nLightsChanged[0] >= 0) // config check added because the code in here was crashing for me inside SetPSConstant4f
{
// lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats
int istart = nLightsChanged[0] / 0x10;

View File

@ -353,7 +353,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
}
// Need to compile a new shader
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components);
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, 65536, components);
D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code, strlen(code), &pbytecode))

View File

@ -44,25 +44,29 @@ static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders;
#define MAX_SSAA_SHADERS 3
enum
{
COPY_TYPE_DIRECT,
COPY_TYPE_MATRIXCOLOR,
NUM_COPY_TYPES
};
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_CopyProgram[NUM_COPY_TYPES][PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES][MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode)
{
return s_ColorMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS];
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode)
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode, int depthConversionType)
{
return s_DepthMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
return s_CopyProgram[COPY_TYPE_MATRIXCOLOR][depthConversionType % NUM_DEPTH_CONVERSION_TYPES][SSAAMode % MAX_SSAA_SHADERS];
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode)
{
return s_ColorCopyProgram[SSAAMode % MAX_SSAA_SHADERS];
return s_CopyProgram[COPY_TYPE_DIRECT][DEPTH_CONVERSION_TYPE_NONE][SSAAMode % MAX_SSAA_SHADERS];
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
@ -95,9 +99,80 @@ public:
}
};
#define WRITE p+=sprintf
static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConversionType, int SSAAMode)
{
//Used for Copy/resolve the color buffer
//Color conversion Programs
//Depth copy programs
// this should create the same shaders as before (plus some extras added for DF16), just... more manageably than listing the full program for each combination
char text[3072];
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary
char* p = text;
WRITE(p, "// Copy/Color Matrix/Depth Matrix shader (matrix=%d, depth=%d, ssaa=%d)\n", copyMatrixType, depthConversionType, SSAAMode);
WRITE(p, "uniform sampler samp0 : register(s0);\n");
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "void main(\n"
"out float4 ocol0 : COLOR0,\n");
switch(SSAAMode % MAX_SSAA_SHADERS)
{
case 0: // 1 Sample
WRITE(p, "in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n");
break;
case 1: // 1 Samples SSAA
WRITE(p, "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n");
break;
case 2: // 4 Samples SSAA
WRITE(p, "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n");
break;
}
switch(depthConversionType % PixelShaderCache::NUM_DEPTH_CONVERSION_TYPES)
{
case PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE:
break;
case PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT:
// this is probably wrong. but it works better than the 24-bit conversion we used to generate in this case.
WRITE(p, "float4 EncodedDepth = frac((texcol.r * (65535.0f/65536.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (65536.0f/65535.0f)),1.0f);\n");
break;
case PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT:
WRITE(p, "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n");
break;
}
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n");
else
WRITE(p, "ocol0 = texcol;\n");
WRITE(p, "}\n");
if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("PixelShaderCache copy shader generator - buffer too small, canary has been eaten!");
setlocale(LC_NUMERIC, ""); // restore locale
return D3D::CompileAndCreatePixelShader(text, (int)strlen(text));
}
void PixelShaderCache::Init()
{
//program used for clear screen
{
char pprog[3072];
sprintf(pprog, "void main(\n"
"out float4 ocol0 : COLOR0,\n"
@ -105,122 +180,39 @@ void PixelShaderCache::Init()
"ocol0 = incol0;\n"
"}\n");
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
}
//Used for Copy/resolve the color buffer
//1 Sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float2 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0);\n"
"}\n");
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants);
//1 Samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"ocol0 = tex2D(samp0,uv0.xy);\n"
"}\n");
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 Samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25;\n"
"}\n");
s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Color conversion Programs
//1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//1 samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Depth copy programs
//1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//1 sample SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 sample SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3){\n"
"float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
// other screen copy/convert programs
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
{
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
{
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
{
if(ssaaMode && !s_CopyProgram[copyMatrixType][depthType][ssaaMode-1]
|| depthType && !s_CopyProgram[copyMatrixType][depthType-1][ssaaMode]
|| copyMatrixType && !s_CopyProgram[copyMatrixType-1][depthType][ssaaMode])
{
// if it failed at a lower setting, it's going to fail here for the same reason it did there,
// so skip this attempt to avoid duplicate error messages.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
}
else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix)
{
// color matrix not supported, so substitute the nearest equivalent program that doesn't use it.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode];
}
else
{
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode);
}
}
}
}
Clear();
@ -248,15 +240,18 @@ void PixelShaderCache::Clear()
void PixelShaderCache::Shutdown()
{
for(int i = 0;i < MAX_SSAA_SHADERS; i++)
{
if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release();
s_ColorMatrixProgram[i] = NULL;
if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release();
s_ColorCopyProgram[i] = NULL;
if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release();
s_DepthMatrixProgram[i] = NULL;
}
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
if(s_CopyProgram[copyMatrixType][depthType][ssaaMode]
&& (copyMatrixType == 0 || s_CopyProgram[copyMatrixType][depthType][ssaaMode] != s_CopyProgram[copyMatrixType-1][depthType][ssaaMode]))
s_CopyProgram[copyMatrixType][depthType][ssaaMode]->Release();
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
for(int depthType = 0; depthType < NUM_DEPTH_CONVERSION_TYPES; depthType++)
for(int ssaaMode = 0; ssaaMode < MAX_SSAA_SHADERS; ssaaMode++)
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
if (s_ClearProgram) s_ClearProgram->Release();
s_ClearProgram = NULL;
@ -296,8 +291,11 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
return (entry.shader != NULL);
}
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
// Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, components);
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_D3D9, maxConstants, components);
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
unique_shaders.insert(code_hash);

View File

@ -62,7 +62,14 @@ public:
static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode);
enum
{
DEPTH_CONVERSION_TYPE_NONE,
DEPTH_CONVERSION_TYPE_16BIT,
DEPTH_CONVERSION_TYPE_24BIT,
NUM_DEPTH_CONVERSION_TYPES
};
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, int depthConversionType);
static LPDIRECT3DPIXELSHADER9 GetClearProgram();
};

View File

@ -589,13 +589,22 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
D3DFORMAT bformat = FramebufferManager::GetEFBDepthRTSurfaceFormat();
int depthConversionType;
if(bformat == FOURCC_RAWZ)
depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE;
else if(bformat == FOURCC_DF16)
depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT;
else
depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT;
D3D::drawShadedTexQuad(
read_texture,
&RectToLock,
Renderer::GetFullTargetWidth(),
Renderer::GetFullTargetHeight(),
4, 4,
(FramebufferManager::GetEFBDepthRTSurfaceFormat() == FOURCC_RAWZ) ? PixelShaderCache::GetColorMatrixProgram(0) : PixelShaderCache::GetDepthMatrixProgram(0),
PixelShaderCache::GetDepthMatrixProgram(0, depthConversionType),
VertexShaderCache::GetSimpleVertexShader(0));
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);

View File

@ -131,12 +131,18 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
D3DFORMAT bformat = FramebufferManager::GetEFBDepthRTSurfaceFormat();
int SSAAMode = g_ActiveConfig.iMultisampleMode;
int depthConversionType;
if(bformat == FOURCC_RAWZ || bformat == D3DFMT_D24X8 || !bFromZBuffer)
depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_NONE;
else if(bformat == FOURCC_DF16)
depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_16BIT;
else
depthConversionType = PixelShaderCache::DEPTH_CONVERSION_TYPE_24BIT;
D3D::drawShadedTexQuad(read_texture, &sourcerect,
Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(),
virtualW, virtualH,
((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer) ?
PixelShaderCache::GetDepthMatrixProgram(SSAAMode) :
PixelShaderCache::GetColorMatrixProgram(SSAAMode),
PixelShaderCache::GetDepthMatrixProgram(SSAAMode, depthConversionType),
VertexShaderCache::GetSimpleVertexShader(SSAAMode));
Rendersurf->Release();

View File

@ -52,6 +52,7 @@ static LPDIRECT3DPIXELSHADER9 s_yuyvToRgbProgram = NULL;
// Not all slots are taken - but who cares.
const u32 NUM_ENCODING_PROGRAMS = 64;
static LPDIRECT3DPIXELSHADER9 s_encodingPrograms[NUM_ENCODING_PROGRAMS];
static bool s_encodingProgramsFailed[NUM_ENCODING_PROGRAMS];
void CreateRgbToYuyvProgram()
{
@ -121,6 +122,13 @@ LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format)
if (!s_encodingPrograms[format])
{
if(s_encodingProgramsFailed[format])
{
// we already failed to create a shader for this format,
// so instead of re-trying and showing the same error message every frame, just return.
return NULL;
}
const char* shader = TextureConversionShader::GenerateEncodingShader(format,API_D3D9);
#if defined(_DEBUG) || defined(DEBUGFAST)
@ -135,6 +143,7 @@ LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format)
s_encodingPrograms[format] = D3D::CompileAndCreatePixelShader(shader, (int)strlen(shader));
if (!s_encodingPrograms[format]) {
ERROR_LOG(VIDEO, "Failed to create encoding fragment program");
s_encodingProgramsFailed[format] = true;
}
}
return s_encodingPrograms[format];
@ -145,6 +154,7 @@ void Init()
for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
{
s_encodingPrograms[i] = NULL;
s_encodingProgramsFailed[i] = false;
}
for (unsigned int i = 0; i < NUM_TRANSFORM_BUFFERS; i++)
{

View File

@ -219,7 +219,7 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp
PSCacheEntry& newentry = PixelShaders[uid];
newentry.frameCount = frameCount;
pShaderLast = &newentry.shader;
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, 65536, components);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {