Multithreadded Shadergen: Second pass over Pixel Shadergen.

Note: It's not 100% perfect, as some of the GPU capablities leak into the
pixel shader UID.

Currently our UIDs don't get exported, so there is no issue. But someone
might want to fix this in the future.
This commit is contained in:
Scott Mansell 2016-01-17 01:41:26 +13:00
parent 1a831cfc7d
commit 24e5d21780
6 changed files with 238 additions and 188 deletions

View File

@ -556,10 +556,10 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode)
{
PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode, API_D3D);
PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode);
if (g_ActiveConfig.bEnableShaderDebugging)
{
ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D);
ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData());
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}
@ -588,7 +588,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode)
}
// Need to compile a new shader
ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D);
ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData());
D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))

View File

@ -163,7 +163,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g
SetCurrentPrimitiveTopology(gs_primitive_type);
GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type);
PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D);
PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode);
VertexShaderUid vs_uid = GetVertexShaderUid();
bool gs_changed = gs_uid != s_last_geometry_shader_uid;
@ -263,7 +263,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_
if (g_ActiveConfig.bEnableShaderDebugging)
{
ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D);
ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData());
s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p");
}
@ -275,7 +275,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_
}
else
{
ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D);
ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData());
ID3DBlob* ps_bytecode = nullptr;
if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode))

View File

@ -81,12 +81,12 @@ protected:
PixelShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type,
API_TYPE api_type) override
{
return GetPixelShaderUid(dst_alpha_mode, api_type);
return GetPixelShaderUid(dst_alpha_mode);
}
ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type,
PixelShaderUid) override
PixelShaderUid uid) override
{
return GeneratePixelShaderCode(dst_alpha_mode, api_type);
return GeneratePixelShaderCode(dst_alpha_mode, api_type, uid.GetUidData());
}
};

View File

@ -210,7 +210,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_
newentry.in_cache = 0;
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData());
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL);
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid.puid.GetUidData());
ShaderCode gcode;
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders &&
!uid.guid.GetUidData()->IsPassthrough())
@ -397,13 +397,13 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type)
{
uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL);
uid->puid = GetPixelShaderUid(dstAlphaMode);
uid->vuid = GetVertexShaderUid();
uid->guid = GetGeometryShaderUid(primitive_type);
if (g_ActiveConfig.bEnableShaderDebugging)
{
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL);
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid->puid.GetUidData());
pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData());

View File

@ -151,28 +151,14 @@ static const char* tevRasTable[] = {
static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb"};
static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"};
template <class T>
static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType);
template <class T>
static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift);
template <class T>
static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap,
bool stereo, API_TYPE ApiType);
template <class T>
static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,
DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
template <class T>
static void WriteFog(T& out, pixel_shader_uid_data* uid_data);
template <class T>
static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support)
// leak
// into this UID; This is really unhelpful if these UIDs ever move from one machine to
// another.
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode)
{
T out;
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
pixel_shader_uid_data dummy_data;
pixel_shader_uid_data* uid_data = out.template GetUidData<pixel_shader_uid_data>();
if (uid_data == nullptr)
uid_data = &dummy_data;
PixelShaderUid out;
pixel_shader_uid_data* uid_data = out.GetUidData<pixel_shader_uid_data>();
memset(uid_data, 0, sizeof(*uid_data));
uid_data->dstAlphaMode = dstAlphaMode;
@ -185,6 +171,186 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
u32 numStages = uid_data->genMode_numtevstages + 1;
const bool forced_early_z =
g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() &&
(g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
const bool per_pixel_depth =
(bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) ||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->per_pixel_depth = per_pixel_depth;
uid_data->forced_early_z = forced_early_z;
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() &&
(!uid_data->fast_depth_calc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED))
{
static bool warn_once = true;
if (warn_once)
WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current "
"configuration. Make sure to enable fast depth calculations. If this message "
"still shows up your hardware isn't able to emulate the feature properly (a "
"GPU with D3D 11.0 / OGL 4.2 support is required).");
warn_once = false;
}
if (uid_data->per_pixel_lighting)
{
// The lighting shader only needs the two color bits of the 23bit component bit array.
uid_data->components =
(VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT;
;
GetLightingShaderUid(uid_data->lighting);
}
if (uid_data->genMode_numtexgens > 0)
{
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
{
// optional perspective divides
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
}
}
// indirect texture map lookup
int nIndirectStagesUsed = 0;
if (uid_data->genMode_numindstages > 0)
{
for (unsigned int i = 0; i < numStages; ++i)
{
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages)
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
{
if (uid_data->nIndirectStagesUsed & (1 << i))
uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i));
}
for (unsigned int n = 0; n < numStages; n++)
{
int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
// HACK to handle cases where the tex gen is not enabled
if (!bHasTexCoord)
texcoord = bpmem.genMode.numtexgens;
uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
uid_data->stagehash[n].tevorders_texcoord = texcoord;
if (uid_data->stagehash[n].hasindstage)
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC;
uid_data->stagehash[n].cc = cc.hex & 0xFFFFFF;
uid_data->stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later
if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA ||
cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC ||
cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA ||
ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
const int i = bpmem.combiners[n].alphaC.rswap;
uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);
}
uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
if (uid_data->stagehash[n].tevorders_enable)
{
const int i = bpmem.combiners[n].alphaC.tswap;
uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);
}
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST ||
cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST ||
ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1);
uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1);
}
}
#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
uid_data->num_values = (uid_data->per_pixel_lighting) ?
sizeof(*uid_data) :
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
uid_data->Pretest = Pretest;
uid_data->late_ztest = bpmem.UseLateDepthTest();
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
// (in this case we need to write a depth value if depth test passes regardless of the alpha
// testing result)
if (uid_data->Pretest == AlphaTest::UNDETERMINED ||
(uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest))
{
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
// ZCOMPLOC HACK:
// The only way to emulate alpha test + early-z is to force early-z in the shader.
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
// we are only able to choose which one we want to respect more.
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
uid_data->alpha_test_use_zcomploc_hack =
bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
!g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze;
}
uid_data->zfreeze = bpmem.genMode.zfreeze;
uid_data->ztex_op = bpmem.ztex2.op;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
}
return out;
}
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
API_TYPE ApiType);
static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp,
int shift);
static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap,
bool stereo, API_TYPE ApiType);
static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType,
DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,
const pixel_shader_uid_data* uid_data)
{
ShaderCode out;
u32 numStages = uid_data->genMode_numtevstages + 1;
out.Write("//Pixel Shader for TEV stages\n");
out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens,
uid_data->genMode_numindstages);
@ -281,27 +447,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
}
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, ApiType, uid_data->genMode_numtexgens,
uid_data->per_pixel_lighting, "");
GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting,
"");
out.Write("};\n");
{
const bool forced_early_z =
g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() &&
(g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
const bool per_pixel_depth =
(bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) ||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->per_pixel_depth = per_pixel_depth;
uid_data->forced_early_z = forced_early_z;
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
}
if (uid_data->forced_early_z)
{
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
@ -360,9 +509,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
warn_once = false;
}
uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
if (ApiType == API_OPENGL)
{
out.Write("out vec4 ocol0;\n");
@ -375,7 +521,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
out.Write("in VertexData {\n");
GenerateVSOutputMembers<T>(
GenerateVSOutputMembers(
out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting,
GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true));
@ -478,20 +624,13 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
"\tfloat3 ldir, h, cosAttn, distAttn;\n"
"\tfloat dist, dist2, attn;\n");
// The lighting shader only needs the two color bits of the 23bit component bit array.
uid_data->components =
(VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT;
;
// TODO: Our current constant usage code isn't able to handle more than one buffer.
// So we can't mark the VS constant as used here. But keep them here as reference.
// out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further
// out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
// out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
// FIXME: Disabled until pixelshadergen is split
// GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
// "colors_", "col");
GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
"colors_", "col");
}
// HACK to handle cases where the tex gen is not enabled
@ -506,7 +645,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{
out.Write("\tint2 fixpoint_uv%d = itrunc(", i);
// optional perspective divides
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
{
out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i);
@ -520,24 +658,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
}
}
// indirect texture map lookup
int nIndirectStagesUsed = 0;
if (uid_data->genMode_numindstages > 0)
{
for (unsigned int i = 0; i < numStages; ++i)
{
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages)
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
{
if (uid_data->nIndirectStagesUsed & (1 << i))
{
uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i));
unsigned int texcoord = uid_data->GetTevindirefCoord(i);
unsigned int texmap = uid_data->GetTevindirefMap(i);
@ -551,17 +675,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
out.Write("\ttempcoord = int2(0, 0);\n");
out.Write("\tint3 iindtex%d = ", i);
SampleTexture<T>(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType);
SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType);
}
}
for (unsigned int i = 0; i < numStages; i++)
WriteStage<T>(out, uid_data, i, ApiType); // build the equation for this stage
#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
uid_data->num_values = (uid_data->per_pixel_lighting) ?
sizeof(*uid_data) :
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
WriteStage(out, uid_data, i, ApiType); // build the equation for this stage
{
// The results of the last texenv stage are put onto the screen,
@ -581,18 +700,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
}
out.Write("\tprev = prev & 255;\n");
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
uid_data->Pretest = Pretest;
uid_data->late_ztest = bpmem.UseLateDepthTest();
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
// (in this case we need to write a depth value if depth test passes regardless of the alpha
// testing result)
if (uid_data->Pretest == AlphaTest::UNDETERMINED ||
(uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest))
WriteAlphaTest<T>(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth);
uid_data->zfreeze = bpmem.genMode.zfreeze;
WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth);
if (uid_data->zfreeze)
{
@ -629,10 +742,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
}
out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");
uid_data->ztex_op = bpmem.ztex2.op;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
// depth texture can safely be ignored if the result won't be written to the depth buffer
// (early_ztest) and isn't used for fog either
const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel;
@ -674,7 +783,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
}
else
{
WriteFog<T>(out, uid_data);
WriteFog(out, uid_data);
out.Write("\tocol0 = float4(prev) / 255.0;\n");
}
@ -704,25 +813,20 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
return out;
}
template <class T>
static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType)
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
API_TYPE ApiType)
{
int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
auto& stage = uid_data->stagehash[n];
out.Write("\n\t// TEV stage %d\n", n);
// HACK to handle cases where the tex gen is not enabled
u32 texcoord = stage.tevorders_texcoord;
bool bHasTexCoord = texcoord < uid_data->genMode_numtexgens;
if (!bHasTexCoord)
texcoord = 0;
out.Write("\n\t// TEV stage %d\n", n);
auto& stage = uid_data->stagehash[n];
uid_data->stagehash[n].hasindstage = bHasIndStage;
uid_data->stagehash[n].tevorders_texcoord = texcoord;
if (stage.hasindstage)
{
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
TevStageIndirect tevind;
tevind.hex = stage.tevind;
@ -785,7 +889,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n,
stage.tevorders_texcoord, n);
texcoord, n);
out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n",
mtxidx, n, mtxidx);
@ -798,7 +902,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n,
stage.tevorders_texcoord, n);
texcoord, n);
out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n",
mtxidx, n, mtxidx);
@ -823,20 +927,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
// wrap S
if (tevind.sw == ITW_OFF)
out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", stage.tevorders_texcoord);
out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord);
else if (tevind.sw == ITW_0)
out.Write("\twrappedcoord.x = 0;\n");
else
out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", stage.tevorders_texcoord,
out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", texcoord,
tevIndWrapStart[tevind.sw]);
// wrap T
if (tevind.tw == ITW_OFF)
out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", stage.tevorders_texcoord);
out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord);
else if (tevind.tw == ITW_0)
out.Write("\twrappedcoord.y = 0;\n");
else
out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", stage.tevorders_texcoord,
out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", texcoord,
tevIndWrapStart[tevind.tw]);
if (tevind.fb_addprev) // add previous tevcoord
@ -848,10 +952,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
}
uid_data->stagehash[n].cc = bpmem.combiners[n].colorC.hex & 0xFFFFFF;
uid_data->stagehash[n].ac =
bpmem.combiners[n].alphaC.hex & 0xFFFFF0; // Storing rswap and tswap later
TevStageCombiner::ColorCombiner cc;
TevStageCombiner::AlphaCombiner ac;
cc.hex = stage.cc;
@ -862,35 +962,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA ||
ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
const int i = bpmem.combiners[n].alphaC.rswap;
uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap;
ac.rswap = bpmem.combiners[n].alphaC.rswap;
uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);
// Generate swizzle string to represent the Ras color channel swapping
char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a],
"rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'};
out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap);
}
uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
if (stage.tevorders_enable)
{
int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);
const int i = bpmem.combiners[n].alphaC.tswap;
uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2;
ac.tswap = bpmem.combiners[n].alphaC.tswap;
uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);
// Generate swizzle string to represent the texture color channel swapping
char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c],
"rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'};
if (!bHasIndStage)
if (!stage.hasindstage)
{
// calc tevcord
if (bHasTexCoord)
@ -898,12 +983,9 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
else
out.Write("\ttevcoord.xy = int2(0, 0);\n");
}
char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c],
"rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'};
out.Write("\ttextemp = ");
SampleTexture<T>(out, "float2(tevcoord.xy)", texswap, texmap, uid_data->stereo, ApiType);
SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, uid_data->stereo,
ApiType);
}
else
{
@ -914,8 +996,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST ||
ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1);
uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1);
out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc],
tevKSelTableA[stage.tevksel_ka]);
@ -1014,8 +1094,8 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.Write(";\n");
}
template <class T>
static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift)
static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp,
int shift)
{
const char* tevScaleTableLeft[] = {
"", // SCALE_1
@ -1061,8 +1141,7 @@ static void WriteTevRegular(T& out, const char* components, int bias, int op, in
out.Write(")%s", tevScaleTableRight[shift]);
}
template <class T>
static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap,
static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap,
bool stereo, API_TYPE ApiType)
{
out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap);
@ -1094,8 +1173,7 @@ static const char* tevAlphaFunclogicTable[] = {
" == " // xnor
};
template <class T>
static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,
static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType,
DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth)
{
static const char* alphaRef[2] = {I_ALPHA ".r", I_ALPHA ".g"};
@ -1107,10 +1185,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api
else
out.Write("\tif(!( ");
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
// Lookup the first component from the alpha function table
int compindex = uid_data->alpha_test_comp0;
out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]);
@ -1133,16 +1207,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api
out.Write("\t\tdepth = %s;\n", (ApiType == API_D3D) ? "0.0" : "1.0");
// ZCOMPLOC HACK:
// The only way to emulate alpha test + early-z is to force early-z in the shader.
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
// we are only able to choose which one we want to respect more.
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
!g_ActiveConfig.backend_info.bSupportsEarlyZ &&
!bpmem.genMode.zfreeze;
if (!uid_data->alpha_test_use_zcomploc_hack)
{
out.Write("\t\tdiscard;\n");
@ -1164,15 +1228,11 @@ static const char* tevFogFuncsTable[] = {
"\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2
};
template <class T>
static void WriteFog(T& out, pixel_shader_uid_data* uid_data)
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
{
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
if (uid_data->fog_fsel == 0)
return; // no Fog
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR);
out.SetConstantsUsed(C_FOGI, C_FOGI);
out.SetConstantsUsed(C_FOGF, C_FOGF + 1);
@ -1198,7 +1258,6 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data)
// ze *= x_adjust
// TODO Instead of this theoretical calculation, we should use the
// coefficient table given in the fog range BP registers!
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
if (uid_data->fog_RangeBaseEnabled)
{
out.SetConstantsUsed(C_FOGF, C_FOGF);
@ -1223,13 +1282,3 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data)
out.Write("\tint ifog = iround(fog * 256.0);\n");
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
}
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{
return GeneratePixelShader<PixelShaderUid>(dstAlphaMode, ApiType);
}
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{
return GeneratePixelShader<ShaderCode>(dstAlphaMode, ApiType);
}

View File

@ -87,7 +87,7 @@ struct pixel_shader_uid_data
}
}
inline u32 GetTevindirefCoord(int index)
inline u32 GetTevindirefCoord(int index) const
{
if (index == 0)
{
@ -108,7 +108,7 @@ struct pixel_shader_uid_data
return 0;
}
inline u32 GetTevindirefMap(int index)
inline u32 GetTevindirefMap(int index) const
{
if (index == 0)
{
@ -165,5 +165,6 @@ struct pixel_shader_uid_data
typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType);
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType);
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,
const pixel_shader_uid_data* uid_data);
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode);