Implement zcomploc on OpenGL4.2+

This commit is contained in:
degasus 2013-07-22 12:02:16 +02:00
parent f693488c8a
commit 15b8ac64ef
9 changed files with 45 additions and 16 deletions

View File

@ -258,7 +258,8 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
unsigned int numStages = bpmem.genMode.numtevstages + 1;
unsigned int numTexgen = bpmem.genMode.numtexgens;
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc;
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.zcontrol.early_ztest && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || (!g_ActiveConfig.bFastDepthCalc && !forced_early_z);
out.Write("//Pixel Shader for TEV stages\n");
out.Write("//%i TEV stages, %i texgens, %i IND stages\n",
@ -372,6 +373,14 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
}
out.Write("float4 clipPos;\n");
}
if (forced_early_z)
{
// HACK: This doesn't force the driver to write to depth buffer if alpha test fails.
// It just allows it, but it seems that all drivers do.
out.Write("layout(early_fragment_tests) in;\n");
}
out.Write("void main()\n{\n");
}
else
@ -584,6 +593,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
uid_data.ztex_op = bpmem.ztex2.op;
uid_data.per_pixel_depth = per_pixel_depth;
uid_data.forced_early_z = forced_early_z;
uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
uid_data.early_ztest = bpmem.zcontrol.early_ztest;
uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel;
@ -1129,17 +1139,20 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE Api
out.Write("\t\tdepth = 1.f;\n");
// HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
// or after texturing and alpha test. PC GPUs have no way to support this
// feature properly as of 2012: depth buffer and depth test are not
// or after texturing and alpha test. PC graphics APIs have no way to support this
// feature properly as of 2012: Depth buffer and depth test are not
// programmable and the depth test is always done after texturing.
// Most importantly, PC GPUs do not allow writing to the z-buffer without
// Most importantly, they do not allow writing to the z-buffer without
// writing a color value (unless color writing is disabled altogether).
// We implement "depth test before texturing" by discarding the fragment
// when the alpha test fail. This is not a correct implementation because
// even if the depth test fails the fragment could be alpha blended, but
// we don't have a choice.
uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable;
if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable))
// We implement "depth test before texturing" by disabling alpha test when early-z is in use.
// It seems to be less buggy than not to update the depth buffer if alpha test fails,
// but both ways wouldn't be accurate.
// OpenGL 4.2 has a flag which allows the driver to still update the depth buffer
// if alpha test fails. The driver doesn't have to, but I assume they all do because
// it's the much faster code path for the GPU.
uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ;
if (!uid_data.alpha_test_use_zcomploc_hack)
{
out.Write("\t\tdiscard;\n");
if (ApiType != API_D3D11)

View File

@ -112,6 +112,7 @@ struct pixel_shader_uid_data
u32 fast_depth_calc : 1;
u32 per_pixel_depth : 1;
u32 forced_early_z : 1;
u32 early_ztest : 1;
u32 xfregs_numTexGen_numTexGens : 4;

View File

@ -155,7 +155,8 @@ struct VideoConfig
bool bSupportsPixelLighting;
bool bSupportsPrimitiveRestart;
bool bSupportsSeparateAlphaFunction;
bool bSupportsGLSLUBO; // needed by pixelShaderGen, so must stay in videoCommon
bool bSupportsGLSLUBO; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
} backend_info;
// Utility

View File

@ -90,6 +90,7 @@ void InitBackendInfo()
g_Config.backend_info.bSupportsFormatReinterpretation = true;
g_Config.backend_info.bSupportsPixelLighting = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsEarlyZ = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;

View File

@ -96,6 +96,7 @@ void InitBackendInfo()
g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsFormatReinterpretation = true;
g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;
g_Config.backend_info.bSupportsEarlyZ = false;
// adapters
g_Config.backend_info.Adapters.clear();

View File

@ -539,7 +539,7 @@ void ProgramShaderCache::CreateHeader ( void )
"%s\n"
"%s\n"
, v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : "140"
, v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : v==GLSL_140 ? "140" : "150"
, v==GLSLES3 ? "precision highp float;" : ""
, g_ActiveConfig.backend_info.bSupportsGLSLUBO && v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
, v==GLSL_120 ? "attribute" : "in"

View File

@ -342,6 +342,7 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsGLSLUBO = true;
g_Config.backend_info.bSupportsPrimitiveRestart = false;
g_Config.backend_info.bSupportsEarlyZ = false;
g_ogl_config.bSupportsGLSLCache = false; // XXX: Reenable once shaders compile correctly
g_ogl_config.bSupportsGLPinnedMemory = false;
@ -433,6 +434,7 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsDualSourceBlend = GLEW_ARB_blend_func_extended;
g_Config.backend_info.bSupportsGLSLUBO = GLEW_ARB_uniform_buffer_object;
g_Config.backend_info.bSupportsPrimitiveRestart = GLEW_VERSION_3_1 || GLEW_NV_primitive_restart;
g_Config.backend_info.bSupportsEarlyZ = GLEW_ARB_shader_image_load_store;
g_ogl_config.bSupportsGLSLCache = GLEW_ARB_get_program_binary;
g_ogl_config.bSupportsGLPinnedMemory = GLEW_AMD_pinned_memory;
@ -453,14 +455,21 @@ Renderer::Renderer()
{
g_ogl_config.eSupportedGLSLVersion = GLSL_120;
g_Config.backend_info.bSupportsDualSourceBlend = false; //TODO: implement dual source blend
g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+
}
else if(strstr(g_ogl_config.glsl_version, "1.30"))
{
g_ogl_config.eSupportedGLSLVersion = GLSL_130;
g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+
}
else if(strstr(g_ogl_config.glsl_version, "1.40"))
{
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+
}
else
{
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
g_ogl_config.eSupportedGLSLVersion = GLSL_150;
}
#endif
@ -489,10 +498,11 @@ Renderer::Renderer()
g_ogl_config.gl_renderer,
g_ogl_config.gl_version).c_str(), 5000);
WARN_LOG(VIDEO,"Missing OGL Extensions: %s%s%s%s%s%s%s%s%s",
WARN_LOG(VIDEO,"Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s",
g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "" : "DualSourceBlend ",
g_ActiveConfig.backend_info.bSupportsGLSLUBO ? "" : "UniformBuffer ",
g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ",
g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ",
g_ogl_config.bSupportsGLPinnedMemory ? "" : "PinnedMemory ",
g_ogl_config.bSupportsGLSLCache ? "" : "ShaderCache ",
g_ogl_config.bSupportsGLBaseVertex ? "" : "BaseVertex ",

View File

@ -12,7 +12,8 @@ void ClearEFBCache();
enum GLSL_VERSION {
GLSL_120,
GLSL_130,
GLSL_140, // and above
GLSL_140,
GLSL_150, // and above
GLSLES3
};

View File

@ -129,9 +129,10 @@ void InitBackendInfo()
g_Config.backend_info.bUseRGBATextures = true;
g_Config.backend_info.bUseMinimalMipCount = false;
g_Config.backend_info.bSupports3DVision = false;
//g_Config.backend_info.bSupportsDualSourceBlend = true; // is gpu depenend and must be set in renderer
//g_Config.backend_info.bSupportsDualSourceBlend = true; // is gpu dependent and must be set in renderer
g_Config.backend_info.bSupportsFormatReinterpretation = false;
g_Config.backend_info.bSupportsPixelLighting = true;
//g_Config.backend_info.bSupportsEarlyZ = true; // is gpu dependent and must be set in renderer
// aamodes
const char* caamodes[] = {_trans("None"), "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA", "4x SSAA"};