Multithreadded Shadergen: Second pass over Pixel Shadergen.

Note: It's not 100% perfect, as some of the GPU capablities leak into the
pixel shader UID.

Currently our UIDs don't get exported, so there is no issue. But someone
might want to fix this in the future.
This commit is contained in:
Scott Mansell 2016-01-17 01:41:26 +13:00
parent 1a831cfc7d
commit 24e5d21780
6 changed files with 238 additions and 188 deletions

View File

@ -556,10 +556,10 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode) bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode)
{ {
PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode, API_D3D); PixelShaderUid uid = GetPixelShaderUid(dstAlphaMode);
if (g_ActiveConfig.bEnableShaderDebugging) if (g_ActiveConfig.bEnableShaderDebugging)
{ {
ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData());
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p"); pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
} }
@ -588,7 +588,7 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode)
} }
// Need to compile a new shader // Need to compile a new shader
ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D); ShaderCode code = GeneratePixelShaderCode(dstAlphaMode, API_D3D, uid.GetUidData());
D3DBlob* pbytecode; D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode)) if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))

View File

@ -163,7 +163,7 @@ void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 g
SetCurrentPrimitiveTopology(gs_primitive_type); SetCurrentPrimitiveTopology(gs_primitive_type);
GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type); GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type);
PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode);
VertexShaderUid vs_uid = GetVertexShaderUid(); VertexShaderUid vs_uid = GetVertexShaderUid();
bool gs_changed = gs_uid != s_last_geometry_shader_uid; bool gs_changed = gs_uid != s_last_geometry_shader_uid;
@ -263,7 +263,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_
if (g_ActiveConfig.bEnableShaderDebugging) if (g_ActiveConfig.bEnableShaderDebugging)
{ {
ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData());
s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p"); s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p");
} }
@ -275,7 +275,7 @@ void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_
} }
else else
{ {
ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D, ps_uid.GetUidData());
ID3DBlob* ps_bytecode = nullptr; ID3DBlob* ps_bytecode = nullptr;
if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode)) if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode))

View File

@ -81,12 +81,12 @@ protected:
PixelShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, PixelShaderUid GetUid(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type,
API_TYPE api_type) override API_TYPE api_type) override
{ {
return GetPixelShaderUid(dst_alpha_mode, api_type); return GetPixelShaderUid(dst_alpha_mode);
} }
ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type, ShaderCode GenerateCode(DSTALPHA_MODE dst_alpha_mode, u32 primitive_type, API_TYPE api_type,
PixelShaderUid) override PixelShaderUid uid) override
{ {
return GeneratePixelShaderCode(dst_alpha_mode, api_type); return GeneratePixelShaderCode(dst_alpha_mode, api_type, uid.GetUidData());
} }
}; };

View File

@ -210,7 +210,7 @@ SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 primitive_
newentry.in_cache = 0; newentry.in_cache = 0;
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData()); ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid.vuid.GetUidData());
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid.puid.GetUidData());
ShaderCode gcode; ShaderCode gcode;
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && if (g_ActiveConfig.backend_info.bSupportsGeometryShaders &&
!uid.guid.GetUidData()->IsPassthrough()) !uid.guid.GetUidData()->IsPassthrough())
@ -397,13 +397,13 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type) void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 primitive_type)
{ {
uid->puid = GetPixelShaderUid(dstAlphaMode, API_OPENGL); uid->puid = GetPixelShaderUid(dstAlphaMode);
uid->vuid = GetVertexShaderUid(); uid->vuid = GetVertexShaderUid();
uid->guid = GetGeometryShaderUid(primitive_type); uid->guid = GetGeometryShaderUid(primitive_type);
if (g_ActiveConfig.bEnableShaderDebugging) if (g_ActiveConfig.bEnableShaderDebugging)
{ {
ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL); ShaderCode pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, uid->puid.GetUidData());
pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p"); pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");
ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData()); ShaderCode vcode = GenerateVertexShaderCode(API_OPENGL, uid->vuid.GetUidData());

View File

@ -151,28 +151,14 @@ static const char* tevRasTable[] = {
static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb"}; static const char* tevCOutputTable[] = {"prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb"};
static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"}; static const char* tevAOutputTable[] = {"prev.a", "c0.a", "c1.a", "c2.a"};
template <class T> // FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support)
static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType); // leak
template <class T> // into this UID; This is really unhelpful if these UIDs ever move from one machine to
static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); // another.
template <class T> PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode)
static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap,
bool stereo, API_TYPE ApiType);
template <class T>
static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,
DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
template <class T>
static void WriteFog(T& out, pixel_shader_uid_data* uid_data);
template <class T>
static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{ {
T out; PixelShaderUid out;
// Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data* uid_data = out.GetUidData<pixel_shader_uid_data>();
pixel_shader_uid_data dummy_data;
pixel_shader_uid_data* uid_data = out.template GetUidData<pixel_shader_uid_data>();
if (uid_data == nullptr)
uid_data = &dummy_data;
memset(uid_data, 0, sizeof(*uid_data)); memset(uid_data, 0, sizeof(*uid_data));
uid_data->dstAlphaMode = dstAlphaMode; uid_data->dstAlphaMode = dstAlphaMode;
@ -185,6 +171,186 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
u32 numStages = uid_data->genMode_numtevstages + 1; u32 numStages = uid_data->genMode_numtevstages + 1;
const bool forced_early_z =
g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() &&
(g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
const bool per_pixel_depth =
(bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) ||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->per_pixel_depth = per_pixel_depth;
uid_data->forced_early_z = forced_early_z;
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() &&
(!uid_data->fast_depth_calc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED))
{
static bool warn_once = true;
if (warn_once)
WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current "
"configuration. Make sure to enable fast depth calculations. If this message "
"still shows up your hardware isn't able to emulate the feature properly (a "
"GPU with D3D 11.0 / OGL 4.2 support is required).");
warn_once = false;
}
if (uid_data->per_pixel_lighting)
{
// The lighting shader only needs the two color bits of the 23bit component bit array.
uid_data->components =
(VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT;
;
GetLightingShaderUid(uid_data->lighting);
}
if (uid_data->genMode_numtexgens > 0)
{
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
{
// optional perspective divides
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
}
}
// indirect texture map lookup
int nIndirectStagesUsed = 0;
if (uid_data->genMode_numindstages > 0)
{
for (unsigned int i = 0; i < numStages; ++i)
{
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages)
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
{
if (uid_data->nIndirectStagesUsed & (1 << i))
uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i));
}
for (unsigned int n = 0; n < numStages; n++)
{
int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
// HACK to handle cases where the tex gen is not enabled
if (!bHasTexCoord)
texcoord = bpmem.genMode.numtexgens;
uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
uid_data->stagehash[n].tevorders_texcoord = texcoord;
if (uid_data->stagehash[n].hasindstage)
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC;
uid_data->stagehash[n].cc = cc.hex & 0xFFFFFF;
uid_data->stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later
if (cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA ||
cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC ||
cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA ||
ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
const int i = bpmem.combiners[n].alphaC.rswap;
uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);
}
uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
if (uid_data->stagehash[n].tevorders_enable)
{
const int i = bpmem.combiners[n].alphaC.tswap;
uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);
}
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST ||
cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST ||
ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1);
uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1);
}
}
#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
uid_data->num_values = (uid_data->per_pixel_lighting) ?
sizeof(*uid_data) :
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
uid_data->Pretest = Pretest;
uid_data->late_ztest = bpmem.UseLateDepthTest();
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
// (in this case we need to write a depth value if depth test passes regardless of the alpha
// testing result)
if (uid_data->Pretest == AlphaTest::UNDETERMINED ||
(uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest))
{
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
// ZCOMPLOC HACK:
// The only way to emulate alpha test + early-z is to force early-z in the shader.
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
// we are only able to choose which one we want to respect more.
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
uid_data->alpha_test_use_zcomploc_hack =
bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
!g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze;
}
uid_data->zfreeze = bpmem.genMode.zfreeze;
uid_data->ztex_op = bpmem.ztex2.op;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
}
return out;
}
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
API_TYPE ApiType);
static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp,
int shift);
static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap,
bool stereo, API_TYPE ApiType);
static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType,
DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,
const pixel_shader_uid_data* uid_data)
{
ShaderCode out;
u32 numStages = uid_data->genMode_numtevstages + 1;
out.Write("//Pixel Shader for TEV stages\n"); out.Write("//Pixel Shader for TEV stages\n");
out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens, out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens,
uid_data->genMode_numindstages); uid_data->genMode_numindstages);
@ -281,27 +447,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
} }
out.Write("struct VS_OUTPUT {\n"); out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers<T>(out, ApiType, uid_data->genMode_numtexgens, GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting,
uid_data->per_pixel_lighting, ""); "");
out.Write("};\n"); out.Write("};\n");
{
const bool forced_early_z =
g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() &&
(g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
const bool per_pixel_depth =
(bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) ||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
uid_data->per_pixel_depth = per_pixel_depth;
uid_data->forced_early_z = forced_early_z;
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
}
if (uid_data->forced_early_z) if (uid_data->forced_early_z)
{ {
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before // Zcomploc (aka early_ztest) is a way to control whether depth test is done before
@ -360,9 +509,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
warn_once = false; warn_once = false;
} }
uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
uid_data->stereo = g_ActiveConfig.iStereoMode > 0;
if (ApiType == API_OPENGL) if (ApiType == API_OPENGL)
{ {
out.Write("out vec4 ocol0;\n"); out.Write("out vec4 ocol0;\n");
@ -375,7 +521,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{ {
out.Write("in VertexData {\n"); out.Write("in VertexData {\n");
GenerateVSOutputMembers<T>( GenerateVSOutputMembers(
out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting,
GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true));
@ -478,20 +624,13 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
"\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat3 ldir, h, cosAttn, distAttn;\n"
"\tfloat dist, dist2, attn;\n"); "\tfloat dist, dist2, attn;\n");
// The lighting shader only needs the two color bits of the 23bit component bit array.
uid_data->components =
(VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT;
;
// TODO: Our current constant usage code isn't able to handle more than one buffer. // TODO: Our current constant usage code isn't able to handle more than one buffer.
// So we can't mark the VS constant as used here. But keep them here as reference. // So we can't mark the VS constant as used here. But keep them here as reference.
// out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further
// out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
// out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
// FIXME: Disabled until pixelshadergen is split "colors_", "col");
// GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
// "colors_", "col");
} }
// HACK to handle cases where the tex gen is not enabled // HACK to handle cases where the tex gen is not enabled
@ -506,7 +645,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{ {
out.Write("\tint2 fixpoint_uv%d = itrunc(", i); out.Write("\tint2 fixpoint_uv%d = itrunc(", i);
// optional perspective divides // optional perspective divides
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ) if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
{ {
out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i);
@ -520,24 +658,10 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
} }
} }
// indirect texture map lookup
int nIndirectStagesUsed = 0;
if (uid_data->genMode_numindstages > 0)
{
for (unsigned int i = 0; i < numStages; ++i)
{
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages)
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
}
uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
for (u32 i = 0; i < uid_data->genMode_numindstages; ++i) for (u32 i = 0; i < uid_data->genMode_numindstages; ++i)
{ {
if (uid_data->nIndirectStagesUsed & (1 << i)) if (uid_data->nIndirectStagesUsed & (1 << i))
{ {
uid_data->SetTevindrefValues(i, bpmem.tevindref.getTexCoord(i), bpmem.tevindref.getTexMap(i));
unsigned int texcoord = uid_data->GetTevindirefCoord(i); unsigned int texcoord = uid_data->GetTevindirefCoord(i);
unsigned int texmap = uid_data->GetTevindirefMap(i); unsigned int texmap = uid_data->GetTevindirefMap(i);
@ -551,17 +675,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\ttempcoord = int2(0, 0);\n");
out.Write("\tint3 iindtex%d = ", i); out.Write("\tint3 iindtex%d = ", i);
SampleTexture<T>(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType); SampleTexture(out, "float2(tempcoord)", "abg", texmap, uid_data->stereo, ApiType);
} }
} }
for (unsigned int i = 0; i < numStages; i++) for (unsigned int i = 0; i < numStages; i++)
WriteStage<T>(out, uid_data, i, ApiType); // build the equation for this stage WriteStage(out, uid_data, i, ApiType); // build the equation for this stage
#define MY_STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
uid_data->num_values = (uid_data->per_pixel_lighting) ?
sizeof(*uid_data) :
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
{ {
// The results of the last texenv stage are put onto the screen, // The results of the last texenv stage are put onto the screen,
@ -581,18 +700,12 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
} }
out.Write("\tprev = prev & 255;\n"); out.Write("\tprev = prev & 255;\n");
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
uid_data->Pretest = Pretest;
uid_data->late_ztest = bpmem.UseLateDepthTest();
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
// (in this case we need to write a depth value if depth test passes regardless of the alpha // (in this case we need to write a depth value if depth test passes regardless of the alpha
// testing result) // testing result)
if (uid_data->Pretest == AlphaTest::UNDETERMINED || if (uid_data->Pretest == AlphaTest::UNDETERMINED ||
(uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest)) (uid_data->Pretest == AlphaTest::FAIL && uid_data->late_ztest))
WriteAlphaTest<T>(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth); WriteAlphaTest(out, uid_data, ApiType, dstAlphaMode, uid_data->per_pixel_depth);
uid_data->zfreeze = bpmem.genMode.zfreeze;
if (uid_data->zfreeze) if (uid_data->zfreeze)
{ {
@ -629,10 +742,6 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
} }
out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n"); out.Write("\tzCoord = clamp(zCoord, 0, 0xFFFFFF);\n");
uid_data->ztex_op = bpmem.ztex2.op;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
// depth texture can safely be ignored if the result won't be written to the depth buffer // depth texture can safely be ignored if the result won't be written to the depth buffer
// (early_ztest) and isn't used for fog either // (early_ztest) and isn't used for fog either
const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel; const bool skip_ztexture = !uid_data->per_pixel_depth && !uid_data->fog_fsel;
@ -674,7 +783,7 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
} }
else else
{ {
WriteFog<T>(out, uid_data); WriteFog(out, uid_data);
out.Write("\tocol0 = float4(prev) / 255.0;\n"); out.Write("\tocol0 = float4(prev) / 255.0;\n");
} }
@ -704,25 +813,20 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
return out; return out;
} }
template <class T> static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE ApiType) API_TYPE ApiType)
{ {
int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); auto& stage = uid_data->stagehash[n];
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; out.Write("\n\t// TEV stage %d\n", n);
bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
// HACK to handle cases where the tex gen is not enabled // HACK to handle cases where the tex gen is not enabled
u32 texcoord = stage.tevorders_texcoord;
bool bHasTexCoord = texcoord < uid_data->genMode_numtexgens;
if (!bHasTexCoord) if (!bHasTexCoord)
texcoord = 0; texcoord = 0;
out.Write("\n\t// TEV stage %d\n", n);
auto& stage = uid_data->stagehash[n];
uid_data->stagehash[n].hasindstage = bHasIndStage;
uid_data->stagehash[n].tevorders_texcoord = texcoord;
if (stage.hasindstage) if (stage.hasindstage)
{ {
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
TevStageIndirect tevind; TevStageIndirect tevind;
tevind.hex = stage.tevind; tevind.hex = stage.tevind;
@ -785,7 +889,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n,
stage.tevorders_texcoord, n); texcoord, n);
out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n",
mtxidx, n, mtxidx); mtxidx, n, mtxidx);
@ -798,7 +902,7 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n,
stage.tevorders_texcoord, n); texcoord, n);
out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n", out.Write("\tif (" I_INDTEXMTX "[%d].w >= 0) indtevtrans%d >>= " I_INDTEXMTX "[%d].w;\n",
mtxidx, n, mtxidx); mtxidx, n, mtxidx);
@ -823,20 +927,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
// wrap S // wrap S
if (tevind.sw == ITW_OFF) if (tevind.sw == ITW_OFF)
out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", stage.tevorders_texcoord); out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord);
else if (tevind.sw == ITW_0) else if (tevind.sw == ITW_0)
out.Write("\twrappedcoord.x = 0;\n"); out.Write("\twrappedcoord.x = 0;\n");
else else
out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", stage.tevorders_texcoord, out.Write("\twrappedcoord.x = fixpoint_uv%d.x & (%s - 1);\n", texcoord,
tevIndWrapStart[tevind.sw]); tevIndWrapStart[tevind.sw]);
// wrap T // wrap T
if (tevind.tw == ITW_OFF) if (tevind.tw == ITW_OFF)
out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", stage.tevorders_texcoord); out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord);
else if (tevind.tw == ITW_0) else if (tevind.tw == ITW_0)
out.Write("\twrappedcoord.y = 0;\n"); out.Write("\twrappedcoord.y = 0;\n");
else else
out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", stage.tevorders_texcoord, out.Write("\twrappedcoord.y = fixpoint_uv%d.y & (%s - 1);\n", texcoord,
tevIndWrapStart[tevind.tw]); tevIndWrapStart[tevind.tw]);
if (tevind.fb_addprev) // add previous tevcoord if (tevind.fb_addprev) // add previous tevcoord
@ -848,10 +952,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
} }
uid_data->stagehash[n].cc = bpmem.combiners[n].colorC.hex & 0xFFFFFF;
uid_data->stagehash[n].ac =
bpmem.combiners[n].alphaC.hex & 0xFFFFF0; // Storing rswap and tswap later
TevStageCombiner::ColorCombiner cc; TevStageCombiner::ColorCombiner cc;
TevStageCombiner::AlphaCombiner ac; TevStageCombiner::AlphaCombiner ac;
cc.hex = stage.cc; cc.hex = stage.cc;
@ -862,35 +962,20 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC || ac.a == TEVALPHAARG_RASA ||
ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA) ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{ {
const int i = bpmem.combiners[n].alphaC.rswap; // Generate swizzle string to represent the Ras color channel swapping
uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap;
ac.rswap = bpmem.combiners[n].alphaC.rswap;
uid_data->stagehash[n].tevksel_swap1a = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2a = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1b = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2b = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);
char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a], char rasswap[5] = {"rgba"[stage.tevksel_swap1a], "rgba"[stage.tevksel_swap2a],
"rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'}; "rgba"[stage.tevksel_swap1b], "rgba"[stage.tevksel_swap2b], '\0'};
out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap); out.Write("\trastemp = %s.%s;\n", tevRasTable[stage.tevorders_colorchan], rasswap);
} }
uid_data->stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
if (stage.tevorders_enable) if (stage.tevorders_enable)
{ {
int texmap = bpmem.tevorders[n / 2].getTexMap(n & 1); // Generate swizzle string to represent the texture color channel swapping
const int i = bpmem.combiners[n].alphaC.tswap; char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c],
uid_data->stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2; "rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'};
ac.tswap = bpmem.combiners[n].alphaC.tswap;
uid_data->stagehash[n].tevksel_swap1c = bpmem.tevksel[i * 2].swap1;
uid_data->stagehash[n].tevksel_swap2c = bpmem.tevksel[i * 2].swap2;
uid_data->stagehash[n].tevksel_swap1d = bpmem.tevksel[i * 2 + 1].swap1;
uid_data->stagehash[n].tevksel_swap2d = bpmem.tevksel[i * 2 + 1].swap2;
uid_data->stagehash[n].tevorders_texmap = bpmem.tevorders[n / 2].getTexMap(n & 1);
if (!bHasIndStage) if (!stage.hasindstage)
{ {
// calc tevcord // calc tevcord
if (bHasTexCoord) if (bHasTexCoord)
@ -898,12 +983,9 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
else else
out.Write("\ttevcoord.xy = int2(0, 0);\n"); out.Write("\ttevcoord.xy = int2(0, 0);\n");
} }
char texswap[5] = {"rgba"[stage.tevksel_swap1c], "rgba"[stage.tevksel_swap2c],
"rgba"[stage.tevksel_swap1d], "rgba"[stage.tevksel_swap2d], '\0'};
out.Write("\ttextemp = "); out.Write("\ttextemp = ");
SampleTexture<T>(out, "float2(tevcoord.xy)", texswap, texmap, uid_data->stereo, ApiType); SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, uid_data->stereo,
ApiType);
} }
else else
{ {
@ -914,8 +996,6 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || cc.d == TEVCOLORARG_KONST || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST ||
ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST) ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{ {
uid_data->stagehash[n].tevksel_kc = bpmem.tevksel[n / 2].getKC(n & 1);
uid_data->stagehash[n].tevksel_ka = bpmem.tevksel[n / 2].getKA(n & 1);
out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc], out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[stage.tevksel_kc],
tevKSelTableA[stage.tevksel_ka]); tevKSelTableA[stage.tevksel_ka]);
@ -1014,8 +1094,8 @@ static void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, API_TYPE
out.Write(";\n"); out.Write(";\n");
} }
template <class T> static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp,
static void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift) int shift)
{ {
const char* tevScaleTableLeft[] = { const char* tevScaleTableLeft[] = {
"", // SCALE_1 "", // SCALE_1
@ -1061,8 +1141,7 @@ static void WriteTevRegular(T& out, const char* components, int bias, int op, in
out.Write(")%s", tevScaleTableRight[shift]); out.Write(")%s", tevScaleTableRight[shift]);
} }
template <class T> static void SampleTexture(ShaderCode& out, const char* texcoords, const char* texswap, int texmap,
static void SampleTexture(T& out, const char* texcoords, const char* texswap, int texmap,
bool stereo, API_TYPE ApiType) bool stereo, API_TYPE ApiType)
{ {
out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap);
@ -1094,8 +1173,7 @@ static const char* tevAlphaFunclogicTable[] = {
" == " // xnor " == " // xnor
}; };
template <class T> static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, API_TYPE ApiType,
static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,
DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth) DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth)
{ {
static const char* alphaRef[2] = {I_ALPHA ".r", I_ALPHA ".g"}; static const char* alphaRef[2] = {I_ALPHA ".r", I_ALPHA ".g"};
@ -1107,10 +1185,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api
else else
out.Write("\tif(!( "); out.Write("\tif(!( ");
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
// Lookup the first component from the alpha function table // Lookup the first component from the alpha function table
int compindex = uid_data->alpha_test_comp0; int compindex = uid_data->alpha_test_comp0;
out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]);
@ -1133,16 +1207,6 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE Api
out.Write("\t\tdepth = %s;\n", (ApiType == API_D3D) ? "0.0" : "1.0"); out.Write("\t\tdepth = %s;\n", (ApiType == API_D3D) ? "0.0" : "1.0");
// ZCOMPLOC HACK: // ZCOMPLOC HACK:
// The only way to emulate alpha test + early-z is to force early-z in the shader.
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
// we are only able to choose which one we want to respect more.
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
!g_ActiveConfig.backend_info.bSupportsEarlyZ &&
!bpmem.genMode.zfreeze;
if (!uid_data->alpha_test_use_zcomploc_hack) if (!uid_data->alpha_test_use_zcomploc_hack)
{ {
out.Write("\t\tdiscard;\n"); out.Write("\t\tdiscard;\n");
@ -1164,15 +1228,11 @@ static const char* tevFogFuncsTable[] = {
"\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2 "\tfog = 1.0 - fog;\n fog = exp2(-8.0 * fog * fog);\n" // backward exp2
}; };
template <class T> static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data)
static void WriteFog(T& out, pixel_shader_uid_data* uid_data)
{ {
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
if (uid_data->fog_fsel == 0) if (uid_data->fog_fsel == 0)
return; // no Fog return; // no Fog
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR);
out.SetConstantsUsed(C_FOGI, C_FOGI); out.SetConstantsUsed(C_FOGI, C_FOGI);
out.SetConstantsUsed(C_FOGF, C_FOGF + 1); out.SetConstantsUsed(C_FOGF, C_FOGF + 1);
@ -1198,7 +1258,6 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data)
// ze *= x_adjust // ze *= x_adjust
// TODO Instead of this theoretical calculation, we should use the // TODO Instead of this theoretical calculation, we should use the
// coefficient table given in the fog range BP registers! // coefficient table given in the fog range BP registers!
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
if (uid_data->fog_RangeBaseEnabled) if (uid_data->fog_RangeBaseEnabled)
{ {
out.SetConstantsUsed(C_FOGF, C_FOGF); out.SetConstantsUsed(C_FOGF, C_FOGF);
@ -1223,13 +1282,3 @@ static void WriteFog(T& out, pixel_shader_uid_data* uid_data)
out.Write("\tint ifog = iround(fog * 256.0);\n"); out.Write("\tint ifog = iround(fog * 256.0);\n");
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n"); out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR ".rgb * ifog) >> 8;\n");
} }
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{
return GeneratePixelShader<PixelShaderUid>(dstAlphaMode, ApiType);
}
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType)
{
return GeneratePixelShader<ShaderCode>(dstAlphaMode, ApiType);
}

View File

@ -87,7 +87,7 @@ struct pixel_shader_uid_data
} }
} }
inline u32 GetTevindirefCoord(int index) inline u32 GetTevindirefCoord(int index) const
{ {
if (index == 0) if (index == 0)
{ {
@ -108,7 +108,7 @@ struct pixel_shader_uid_data
return 0; return 0;
} }
inline u32 GetTevindirefMap(int index) inline u32 GetTevindirefMap(int index) const
{ {
if (index == 0) if (index == 0)
{ {
@ -165,5 +165,6 @@ struct pixel_shader_uid_data
typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid; typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType,
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType); const pixel_shader_uid_data* uid_data);
PixelShaderUid GetPixelShaderUid(DSTALPHA_MODE dstAlphaMode);