small code clean up:

d3d: remove the depth texture path it will not work so for now only lockable textures were available. i thing the only way will be a use a secondary render target and do a depth pass to mimic depth textures.
opengl: minor cleanup here and there and a possible fix to issue 1509, but not tested because in my system the errors remarked in the issue don't show.
commond: code reorganization an optimization in the pixel shader  generator, trying to understand the tev stages, make some changes to make the generator faster (not much only started for the alpha test and fog).
this changes try start solving the diferences bethen the hardware plugins and the soft plugin.
i hope with i don't brake nothing but please test this a lot.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4465 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2009-10-25 02:35:21 +00:00
parent 7bff609ad8
commit 227d363881
7 changed files with 211 additions and 268 deletions

View File

@ -154,13 +154,13 @@ const float epsilon8bit = 1.0f / 255.0f;
static const char *tevKSelTableC[] = // KCSEL
{
"1.0f,1.0f,1.0f", // 1 = 0x00
"0.875,0.875,0.875", // 7_8 = 0x01
"0.75,0.75,0.75", // 3_4 = 0x02
"0.625,0.625,0.625", // 5_8 = 0x03
"0.5,0.5,0.5", // 1_2 = 0x04
"0.375,0.375,0.375", // 3_8 = 0x05
"0.25,0.25,0.25", // 1_4 = 0x06
"0.125,0.125,0.125", // 1_8 = 0x07
"0.875f,0.875f,0.875f", // 7_8 = 0x01
"0.75f,0.75f,0.75f", // 3_4 = 0x02
"0.625f,0.625f,0.625f", // 5_8 = 0x03
"0.5f,0.5f,0.5f", // 1_2 = 0x04
"0.375f,0.375f,0.375f", // 3_8 = 0x05
"0.25f,0.25f,0.25f", // 1_4 = 0x06
"0.125f,0.125f,0.125f", // 1_8 = 0x07
"ERROR", // 0x08
"ERROR", // 0x09
"ERROR", // 0x0a
@ -266,7 +266,7 @@ static const char *tevCInputTable[] = // CC
"rastemp.rgb", // RASC,
"rastemp.aaa", // RASA,
"float3(1.0f,1.0f,1.0f)", // ONE,
"float3(.5f,.5f,.5f)", // HALF,
"float3(0.5f,0.5f,0.5f)", // HALF,
"konsttemp.rgb", // KONST,
"float3(0.0f,0.0f,0.0f)", // ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
@ -291,7 +291,7 @@ static const char *tevCInputTable2[] = // CC
"rastemp", // RASC,
"(rastemp.aaa)", // RASA,
"float3(1.0f,1.0f,1.0f)", // ONE
"float3(.5f,.5f,.5f)", // HALF
"float3(0.5f,0.5f,0.5f)", // HALF
"konsttemp", //"konsttemp.rgb", // KONST
"float3(0.0f,0.0f,0.0f)", // ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
@ -326,7 +326,7 @@ static const char *tevAInputTable2[] = // CA
"textemp", // TEXA,
"rastemp", // RASA,
"konsttemp", // KONST, (hw1 had quarter)
"float4(0,0,0,0)", // ZERO
"float4(0.0,0.0,0.0,0.0)", // ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
@ -342,7 +342,7 @@ static const char *tevRasTable[] =
"ERROR", //4
"alphabump", // use bump alpha
"(alphabump*(255.0f/248.0f))", //normalized
"float4(0,0,0,0)", // zero
"float4(0.0,0.0,0.0,0.0)", // zero
};
static const char *alphaRef[2] =
@ -473,7 +473,7 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
char* pmainstart = p;
WRITE(p, " float4 c0="I_COLORS"[1],c1="I_COLORS"[2],c2="I_COLORS"[3],prev=float4(0.0f,0.0f,0.0f,0.0f),textemp,rastemp,konsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float3 comp16 = float3(1,255,0), comp24 = float3(1,255,255*255);\n"
" float3 comp16 = float3(1.0f,255.0f,0.0f), comp24 = float3(1.0f,255.0f,255.0f*255.0f);\n"
" float4 alphabump=0;\n"
" float3 tevcoord;\n"
" float2 wrappedcoord, tempcoord;\n\n");
@ -533,9 +533,8 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, bool HLSL
{
// alpha test will always fail, so restart the shader and just make it an empty function
p = pmainstart;
WRITE(p, HLSL ? "clip(-1);" : "discard;\n");
//WRITE(p, "discard;\n");
WRITE(p, "ocol0 = 0;\n");
WRITE(p, HLSL ? "clip(-1);" : "discard;\n");
}
else
{
@ -578,19 +577,28 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
{
// write the bump alpha
if (bpmem.tevind[n].fmt == ITF_8)
WRITE(p, "alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]);
WRITE(p, "alphabump = indtex%d.%s %s;\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
tevIndAlphaScale[bpmem.tevind[n].fmt]);
else
{
// donkopunchstania: really bad way to do this
// cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0
// omitting fract seems to work as well
WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs]);
WRITE(p, "if (indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs]);
WRITE(p, " alphabump = 1.0f;\n");
WRITE(p, "else\n");
WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n", bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]);
WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
tevIndAlphaScale[bpmem.tevind[n].fmt]);
/*WRITE(p, " alphabump = indtex%d.%s %s;\n",
bpmem.tevind[n].bt,
tevIndAlphaSel[bpmem.tevind[n].bs],
tevIndAlphaScale[bpmem.tevind[n].fmt]);
WRITE(p, "if (alphabump > 1.0f ){ alphabump = fract ( alphabump );if (alphabump == 0.0f ) alphabump = 1.0f;}\n");*/
}
}
@ -669,7 +677,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
SampleTexture(p, "textemp", "tevcoord", texswap, texmap, texture_mask, HLSL);
}
else
WRITE(p, "textemp=float4(1,1,1,1);\n");
WRITE(p, "textemp=float4(1.0,1.0,1.0,1.0);\n");
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
@ -720,23 +728,41 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
case TEVCMP_R8_GT:
case TEVCMP_RGB8_GT: // per component compares
WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d], tevCInputTable2[cc.a], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable2[cc.b], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable[cc.c]);
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
cmp==TEVCMP_R8_GT?"r":"rgb",
tevCInputTable2[cc.b],
cmp==TEVCMP_R8_GT?"r":"rgb",
tevCInputTable[cc.c]);
break;
case TEVCMP_R8_EQ:
case TEVCMP_RGB8_EQ:
WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], epsilon8bit, tevCInputTable[cc.c]);
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
tevCInputTable2[cc.b],
epsilon8bit,
tevCInputTable[cc.c]);
break;
case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_GT?"16":"24", tevCInputTable[cc.c]);
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
tevCInputTable2[cc.b],
cmp==TEVCMP_GR16_GT?"16":"24",
tevCInputTable[cc.c]);
break;
case TEVCMP_GR16_EQ:
case TEVCMP_BGR24_EQ:
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f))",
tevCInputTable[cc.d], tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_EQ?"16":"24", epsilon8bit, tevCInputTable[cc.c]);
tevCInputTable[cc.d],
tevCInputTable2[cc.a],
tevCInputTable2[cc.b],
cmp==TEVCMP_GR16_EQ?"16":"24",
epsilon8bit,
tevCInputTable[cc.c]);
break;
default:
WRITE(p, "float3(0.0f,0.0f,0.0f)");
@ -785,23 +811,41 @@ static void WriteStage(char *&p, int n, u32 texture_mask, bool HLSL)
case TEVCMP_R8_GT:
case TEVCMP_A8_GT:
WRITE(p, " %s + ((%s.%s > %s.%s) ? %s : 0)",
tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]);
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
cmp==TEVCMP_R8_GT?"r":"a",
tevAInputTable2[ac.b],
cmp==TEVCMP_R8_GT?"r":"a",
tevAInputTable[ac.c]);
break;
case TEVCMP_R8_EQ:
case TEVCMP_A8_EQ:
WRITE(p, " %s + (abs(%s.r - %s.r)<%f ? %s : 0)",
tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon8bit,tevAInputTable[ac.c]);
WRITE(p, " %s + (abs(%s.r - %s.r)<= %f ? %s : 0)",
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
tevAInputTable2[ac.b],
epsilon8bit,
tevAInputTable[ac.c]);
break;
case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte)
case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r
WRITE(p, " %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)",
tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]);
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
tevAInputTable2[ac.b],
cmp==TEVCMP_GR16_GT?"16":"24",
tevAInputTable[ac.c]);
break;
case TEVCMP_GR16_EQ:
case TEVCMP_BGR24_EQ:
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)",
tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_EQ?"16":"24",epsilon8bit,tevAInputTable[ac.c]);
WRITE(p, " %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<=%f ? %s : 0)",
tevAInputTable[ac.d],
tevAInputTable2[ac.a],
tevAInputTable2[ac.b],
cmp==TEVCMP_GR16_EQ?"16":"24",
epsilon8bit,
tevAInputTable[ac.c]);
break;
default:
WRITE(p, "0)");
@ -851,21 +895,37 @@ void SampleTexture(char *&p, const char *destination, const char *texcoords, con
}
}
static void WriteAlphaCompare(char *&p, int num, int comp)
static const char *tevAlphaFuncsTable[] =
{
switch(comp)
{
case ALPHACMP_ALWAYS: WRITE(p, "(false)"); break;
case ALPHACMP_NEVER: WRITE(p, "(true)"); break;
case ALPHACMP_LEQUAL: WRITE(p, "(prev.a > %s)",alphaRef[num]); break;
case ALPHACMP_LESS: WRITE(p, "(prev.a >= %s - %f)",alphaRef[num],epsilon8bit*0.5f);break;
case ALPHACMP_GEQUAL: WRITE(p, "(prev.a < %s)",alphaRef[num]); break;
case ALPHACMP_GREATER: WRITE(p, "(prev.a <= %s + %f)",alphaRef[num],epsilon8bit*0.5f);break;
case ALPHACMP_EQUAL: WRITE(p, "(abs(prev.a-%s)>%f)",alphaRef[num],epsilon8bit*2); break;
case ALPHACMP_NEQUAL: WRITE(p, "(abs(prev.a-%s)<%f)",alphaRef[num],epsilon8bit*2); break;
default: PanicAlert("Bad Alpha Compare! %08x", comp);
}
}
"(false)", //ALPHACMP_NEVER 0
"(prev.a < %s + %f)", //ALPHACMP_LESS 1
"(abs( prev.a - %s ) <= %f)", //ALPHACMP_EQUAL 2
"(prev.a <= %s + %f)", //ALPHACMP_LEQUAL 3
"(prev.a > %s - %f)", //ALPHACMP_GREATER 4
"(abs( prev.a - %s ) > %f)", //ALPHACMP_NEQUAL 5
"(prev.a >= %s - %f)", //ALPHACMP_GEQUAL 6
"(true)" //ALPHACMP_ALWAYS 7
};
static const float tevAlphaDeltas[] =
{
0.0f, //ALPHACMP_NEVER 0
epsilon8bit*0.5f, //ALPHACMP_LESS 1
epsilon8bit, //ALPHACMP_EQUAL 2
epsilon8bit*0.5f, //ALPHACMP_LEQUAL 3
epsilon8bit*0.5f, //ALPHACMP_GREATER 4
epsilon8bit, //ALPHACMP_NEQUAL 5
epsilon8bit*0.5f, //ALPHACMP_GEQUAL 6
0.0f //ALPHACMP_ALWAYS 7
};
static const char *tevAlphaFunclogicTable[] =
{
" && ", // and
" || ", // or
" != ", // xor
" == " // xnor
};
static bool WriteAlphaTest(char *&p, bool HLSL)
{
@ -876,38 +936,22 @@ static bool WriteAlphaTest(char *&p, bool HLSL)
switch(op)
{
case 0: // AND
if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS)
return true;
if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER)
{
WRITE(p, HLSL ? "clip(-1);" : "discard;\n");
return false;
}
if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) return false;
break;
case 1: // OR
if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS)
return true;
if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)
{
WRITE(p, HLSL ? "clip(-1);" : "discard;\n");
return false;
}
if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true;
if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER)return false;
break;
case 2: // XOR
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS))
return true;
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER))
{
WRITE(p, HLSL ? "clip(-1);" : "discard;\n");
return false;
}
return false;
break;
case 3: // XNOR
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS))
{
WRITE(p, HLSL ? "clip(-1);" : "discard;\n");
return false;
}
return false;
if ((comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER))
return true;
break;
@ -918,78 +962,67 @@ static bool WriteAlphaTest(char *&p, bool HLSL)
if (HLSL)
WRITE(p, "clip( ");
else
WRITE(p, "discard( ");
WRITE(p, "discard(!( ");
WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0);
int compindex = bpmem.alphaFunc.comp0 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0],tevAlphaDeltas[compindex]);
// negated because testing the inverse condition
switch (bpmem.alphaFunc.logic)
{
case 0: WRITE(p, " || "); break; // and
case 1: WRITE(p, " && "); break; // or
case 2: WRITE(p, " == "); break; // xor
case 3: WRITE(p, " != "); break; // xnor
default: break;
}
WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1);
WRITE(p, tevAlphaFunclogicTable[bpmem.alphaFunc.logic % 4]);
compindex = bpmem.alphaFunc.comp1 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1],tevAlphaDeltas[compindex]);
if (HLSL) {
// clip works differently than discard - discard takes a bool, clip takes a value that kills the pixel on negative
WRITE(p, " ? -1 : 1);\n");
WRITE(p, " ? 1 : -1);\n");
} else {
WRITE(p, ");\n");
WRITE(p, "));\n");
}
return true;
}
static const char *tevFogFuncsTable[] =
{
"", //No Fog
"", //?
"", //Linear
"", //?
" fog = 1.0f - pow(2, -8.0f * fog);\n", //exp
" fog = 1.0f - pow(2, -8.0f * fog * fog);\n", //exp2
" fog = pow(2, -8.0f * (1.0f - fog));\n", //backward exp
" fog = 1.0f - fog;\n fog = pow(2, -8.0f * fog * fog);\n" //backward exp2
};
static void WriteFog(char *&p)
{
bool enabled = bpmem.fog.c_proj_fsel.fsel == 0 ? false : true;
if(bpmem.fog.c_proj_fsel.fsel == 0)return;//no Fog
if (enabled)
if (bpmem.fog.c_proj_fsel.proj == 0)
{
if (bpmem.fog.c_proj_fsel.proj == 0)
{
// perspective
// ze = A/(B - Zs)
WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n");
}
else
{
// orthographic
// ze = a*Zs
WRITE (p, " float ze = "I_FOG"[1].x * depth;\n");
}
//WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n");
WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n");
switch (bpmem.fog.c_proj_fsel.fsel)
{
case 0: // TODO - No fog?
break;
case 2: // linear
// empty
break;
case 4: // exp
WRITE(p, " fog = 1.0f - pow(2, -8.0f * fog);\n");
break;
case 5: // exp2
WRITE(p, " fog = 1.0f - pow(2, -8.0f * fog * fog);\n");
break;
case 6: // backward exp
WRITE(p, " fog = 1.0f - fog;\n");
WRITE(p, " fog = pow(2, -8.0f * fog);\n");
break;
case 7: // backward exp2
WRITE(p, " fog = 1.0f - fog;\n");
WRITE(p, " fog = pow(2, -8.0f * fog * fog);\n");
break;
default: WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel);
}
WRITE(p, " prev.rgb = (1.0f - fog) * prev.rgb + (fog * "I_FOG"[0].rgb);\n");
// perspective
// ze = A/(B - Zs)
WRITE (p, " float ze = "I_FOG"[1].x / ("I_FOG"[1].y - depth);\n");
}
else
{
// orthographic
// ze = a*Zs
WRITE (p, " float ze = "I_FOG"[1].x * depth;\n");
}
//WRITE (p, " float fog = clamp(ze - "I_FOG"[1].z, 0.0f, 1.0f);\n");
WRITE (p, " float fog = saturate(ze - "I_FOG"[1].z);\n");
if(bpmem.fog.c_proj_fsel.fsel > 3)
{
WRITE(p, tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]);
}
else
{
if(bpmem.fog.c_proj_fsel.fsel != 2)
WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel);
}
WRITE(p, " prev.rgb = (1.0f - fog) * prev.rgb + (fog * "I_FOG"[0].rgb);\n");
}

View File

@ -30,6 +30,7 @@
#define WRITE p+=sprintf
static char text[16384];
static bool IntensityConstantAdded = false;
namespace TextureConversionShader
{
@ -157,7 +158,13 @@ void WriteSampleColor(char*& p, const char* colorComp, const char* dest)
void WriteColorToIntensity(char*& p, const char* src, const char* dest)
{
WRITE(p, " %s = (0.257f * %s.r) + (0.504f * %s.g) + (0.098f * %s.b) + 0.0625f;\n", dest, src, src, src);
if(!IntensityConstantAdded)
{
WRITE(p, " float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n");
IntensityConstantAdded = true;
}
//WRITE(p, " %s = (0.257f * %s.r) + (0.504f * %s.g) + (0.098f * %s.b) + 0.0625f;\n", dest, src, src, src);
WRITE(p, " %s = dot(IntensityConst.rgb, %s.rgb) + IntensityConst.a;\n", dest, src);
}
void WriteIncrementSampleX(char*& p)
@ -171,6 +178,12 @@ void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest)
WRITE(p, " %s = floor(%s * %ff);\n", dest, src, result);
}
void WriteEncoderEnd(char* p)
{
WRITE(p, "}\n");
IntensityConstantAdded = false;
}
void WriteI8Encoder(char* p)
{
WriteSwizzler(p, GX_TF_I8);
@ -191,7 +204,7 @@ void WriteI8Encoder(char* p)
WriteSampleColor(p, "rgb", "texSample");
WriteColorToIntensity(p, "texSample", "ocol0.a");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteI4Encoder(char* p)
@ -236,7 +249,7 @@ void WriteI4Encoder(char* p)
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteIA8Encoder(char* p)
@ -253,7 +266,7 @@ void WriteIA8Encoder(char* p)
WRITE(p, " ocol0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.a");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteIA4Encoder(char* p)
@ -286,7 +299,7 @@ void WriteIA4Encoder(char* p)
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteRGB565Encoder(char* p)
@ -321,7 +334,7 @@ void WriteRGB565Encoder(char* p)
WRITE(p, " ocol0.a = ocol0.a + gLower * 32.0f;\n");
WRITE(p, " ocol0 = ocol0 / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteRGB5A3Encoder(char* p)
@ -388,7 +401,7 @@ void WriteRGB5A3Encoder(char* p)
WRITE(p, "}\n");
WRITE(p, " ocol0 = ocol0 / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteRGBA4443Encoder(char* p)
@ -414,7 +427,7 @@ void WriteRGBA4443Encoder(char* p)
WriteToBitDepth(p, 4, "texSample.b", "color1.a");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteRGBA8Encoder(char* p)
@ -444,7 +457,7 @@ void WriteRGBA8Encoder(char* p)
WRITE(p, " ocol0 = (cl0 * color0) + (cl1 * color1);\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteC4Encoder(char* p, const char* comp)
@ -480,7 +493,7 @@ void WriteC4Encoder(char* p, const char* comp)
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteC8Encoder(char* p, const char* comp)
@ -498,7 +511,7 @@ void WriteC8Encoder(char* p, const char* comp)
WriteSampleColor(p, comp, "ocol0.a");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteCC4Encoder(char* p, const char* comp)
@ -531,7 +544,7 @@ void WriteCC4Encoder(char* p, const char* comp)
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteCC8Encoder(char* p, const char* comp)
@ -543,7 +556,7 @@ void WriteCC8Encoder(char* p, const char* comp)
WriteSampleColor(p, comp, "ocol0.ra");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteZ8Encoder(char* p, const char* multiplier)
@ -567,7 +580,7 @@ void WriteZ8Encoder(char* p, const char* multiplier)
WriteSampleColor(p, "b", "depth");
WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier);
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteZ16Encoder(char* p)
@ -588,7 +601,7 @@ void WriteZ16Encoder(char* p)
WRITE(p, " ocol0.r = frac(depth * 256.0f);\n");
WRITE(p, " ocol0.a = depth;\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteZ16LEncoder(char* p)
@ -609,7 +622,7 @@ void WriteZ16LEncoder(char* p)
WRITE(p, " ocol0.r = frac(depth * 65536.0f);\n");
WRITE(p, " ocol0.a = frac(depth * 256.0f);\n");
WRITE(p, "}\n");
WriteEncoderEnd(p);
}
void WriteZ24Encoder(char* p)
@ -637,8 +650,8 @@ void WriteZ24Encoder(char* p)
WRITE(p, " ocol0.g = frac(depth0 * 65536.0f);\n");
WRITE(p, " ocol0.r = 1.0f;\n");
WRITE(p, " ocol0.a = frac(depth0 * 65536.0f);\n");
WRITE(p, " }\n"
"}\n");
WRITE(p, " }\n");
WriteEncoderEnd(p);
}
const char *GenerateEncodingShader(u32 format)

View File

@ -40,7 +40,7 @@ static LPDIRECT3DSURFACE9 s_efb_depth_OffScreenReadBuffer;
static D3DFORMAT s_efb_color_surface_Format;
static D3DFORMAT s_efb_depth_surface_Format;
#undef CHECK
#define CHECK(hr,Message) //if (FAILED(hr)) { PanicAlert(__FUNCTION__ " FAIL: %s" ,Message); }
#define CHECK(hr,Message) if (FAILED(hr)) { PanicAlert(__FUNCTION__ " FAIL: %s" ,Message); }
@ -102,87 +102,11 @@ void Create()
if (g_ActiveConfig.bEFBAccessEnable)
{
//depth format in prefered order
D3DFORMAT *DepthTexFormats = new D3DFORMAT[7];
DepthTexFormats[0] = (D3DFORMAT)MAKEFOURCC('D','F','2','4');
DepthTexFormats[1] = (D3DFORMAT)MAKEFOURCC('I','N','T','Z');
DepthTexFormats[2] = (D3DFORMAT)MAKEFOURCC('R','A','W','Z');
DepthTexFormats[3] = (D3DFORMAT)MAKEFOURCC('D','F','1','6');
DepthTexFormats[4] = D3DFMT_D32F_LOCKABLE;
DepthTexFormats[5] = D3DFMT_D16_LOCKABLE;
DepthTexFormats[6] = D3DFMT_D24X8;
for (int i = 0;i<4;i++)
{
s_efb_depth_surface_Format = DepthTexFormats[i];
hr = D3D::dev->CreateTexture(target_width, target_height, 1, D3DUSAGE_DEPTHSTENCIL, s_efb_depth_surface_Format,
D3DPOOL_DEFAULT, &s_efb_depth_texture, NULL);
if (!FAILED(hr))
break;
}
CHECK(hr,"Create Depth Texture");
if (!FAILED(hr))
{
//we found a dept texture suported by hardware so get the surface to draw to
hr = s_efb_depth_texture->GetSurfaceLevel(0, &s_efb_depth_surface);
CHECK(hr,"Get Depth Surface");
//create a buffer texture for peeking
hr = D3D::dev->CreateTexture(1, 1, 1, D3DUSAGE_DEPTHSTENCIL, s_efb_depth_surface_Format,
D3DPOOL_DEFAULT, &s_efb_depthBuffer_texture, NULL);
CHECK(hr,"Create Depth Pixel Texture");
if (!FAILED(hr))
{
//texture create correctly so get the surface
hr = s_efb_depthBuffer_texture->GetSurfaceLevel(0, &s_efb_depth_ReadBuffer);
CHECK(hr,"Get Depth Pixel Surface");
// create an ofscren surface to grab the data
hr = D3D::dev->CreateOffscreenPlainSurface(1, 1, s_efb_depth_surface_Format, D3DPOOL_SYSTEMMEM, &s_efb_depth_OffScreenReadBuffer, NULL );
CHECK(hr,"Create Depth offscreen Surface");
if (FAILED(hr))
{
//no depth in system mem so try vista path to grab depth data
//create a offscreen lockeable surface
hr = D3D::dev->CreateOffscreenPlainSurface(1, 1, D3DFMT_D32F_LOCKABLE, D3DPOOL_DEFAULT, &s_efb_depth_OffScreenReadBuffer, NULL );
CHECK(hr, "Create Depth D3DFMT_D32F_LOCKABLE offscreen Surface");
if (s_efb_depth_ReadBuffer)
s_efb_depth_ReadBuffer->Release();
//this is ugly but is a fast way to test wich path to proceed for peeking
s_efb_depth_ReadBuffer = s_efb_depth_OffScreenReadBuffer;
s_efb_depth_surface_Format = D3DFMT_D32F_LOCKABLE;
}
}
}
if (!FAILED(hr))
{
//so far so god, texture depth works so return
delete [] DepthTexFormats;
return;
}
else
{
//no depth texture... cleanup
if(s_efb_depth_ReadBuffer)
s_efb_depth_ReadBuffer->Release();
s_efb_depth_ReadBuffer = NULL;
if(s_efb_depth_OffScreenReadBuffer)
s_efb_depth_OffScreenReadBuffer->Release();
if(s_efb_depth_surface)
s_efb_depth_surface->Release();
s_efb_depth_surface = NULL;
if(s_efb_depthBuffer_texture)
s_efb_depthBuffer_texture->Release();
s_efb_depthBuffer_texture=NULL;
if(s_efb_depth_texture)
s_efb_depth_texture->Release();
s_efb_depth_texture = NULL;
}
// no depth textures... try to create an lockable depth surface
for(int i = 4;i<7;i++)
D3DFORMAT *DepthTexFormats = new D3DFORMAT[3];
DepthTexFormats[0] = D3DFMT_D32F_LOCKABLE;
DepthTexFormats[1] = D3DFMT_D16_LOCKABLE;
DepthTexFormats[2] = D3DFMT_D24X8;
for(int i = 0;i<3;i++)
{
s_efb_depth_surface_Format = DepthTexFormats[i];
hr = D3D::dev->CreateDepthStencilSurface(target_width, target_height, s_efb_depth_surface_Format,
@ -190,6 +114,7 @@ void Create()
if (!FAILED(hr)) break;
}
s_efb_depth_ReadBuffer = s_efb_depth_surface;
s_efb_depth_OffScreenReadBuffer = s_efb_depth_surface;
CHECK(hr,"CreateDepthStencilSurface");
delete [] DepthTexFormats;
}

View File

@ -534,26 +534,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
RectToLock.left = 0;
RectToLock.right = 1;
RectToLock.top = 0;
}
else
{
//like i say in FramebufferManager this is ugly... using the pointers to decide the peek path.. but it works:)
if(BufferFormat == D3DFMT_D32F_LOCKABLE && RBuffer == pOffScreenBuffer)
{
//we are using vista path so use updateSurface to copy depth data
hr = D3D::dev->UpdateSurface(pBuffer,&RectToLock,pOffScreenBuffer,NULL);
if(FAILED(hr))
{
PanicAlert("Unable to update data to mem buffer");
return 0;
}
}
else
{
//we are using lockable depth buffer so change the pointer to lock it directly
pOffScreenBuffer = pBuffer;
}
}
}
//the surface is good.. lock it
if((hr = pOffScreenBuffer->LockRect(&drect, &RectToLock, D3DLOCK_READONLY)) != D3D_OK)
{
@ -571,7 +552,6 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
val = ((float *)drect.pBits)[0];
z = ((u32)(val * 0xffffff));// 0xFFFFFFFF;
break;
case (D3DFORMAT)MAKEFOURCC('D','F','1','6'):
case D3DFMT_D16_LOCKABLE:
val = ((float)((u16 *)drect.pBits)[0])/((float)0xFFFF);
z = ((u32)(val * 0xffffff));

View File

@ -286,8 +286,6 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
glGenProgramsARB(1, &ps.glprogid);
EnableShader(ps.glprogid);
//glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ps.glprogid);
//CurrentShader = ps.glprogid;
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
err = GL_REPORT_ERROR();

View File

@ -144,8 +144,14 @@ static const GLenum glSrcFactors[8] =
};
static const GLenum glDestFactors[8] = {
GL_ZERO, GL_ONE, GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR,
GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA
};
void SetDefaultRectTexParams()
@ -538,14 +544,9 @@ void Renderer::RestoreAPIState()
void Renderer::SetColorMask()
{
if (bpmem.blendmode.alphaupdate && bpmem.blendmode.colorupdate)
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
else if (bpmem.blendmode.alphaupdate)
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
else if (bpmem.blendmode.colorupdate)
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_FALSE);
else
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
GLenum ColorMask = (bpmem.blendmode.colorupdate) ? GL_TRUE : GL_FALSE;
GLenum AlphaMask = (bpmem.blendmode.alphaupdate) ? GL_TRUE : GL_FALSE;
glColorMask(ColorMask, ColorMask, ColorMask, AlphaMask);
}
void Renderer::SetBlendMode(bool forceUpdate)
@ -1001,14 +1002,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
}
// ---------------------------------------------------------------------
GL_REPORT_ERRORD();
/*for (int i = 0; i < 8; i++) {
glActiveTexture(GL_TEXTURE0 + i);
glDisable(GL_TEXTURE_2D);
glDisable(GL_TEXTURE_RECTANGLE_ARB);
}
glActiveTexture(GL_TEXTURE0);*/
DrawDebugText();
GL_REPORT_ERRORD();

View File

@ -280,7 +280,7 @@ void VertexShaderCache::DisableShader()
void VertexShaderCache::SetCurrentShader(GLuint Shader)
{
if(ShaderEnabled && CurrentShader != Shader)
if(ShaderEnabled /*&& CurrentShader != Shader*/)
{
CurrentShader = Shader;
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader);