Implement zcomploc on OpenGL4.2+

This commit is contained in:
degasus 2013-07-22 12:02:16 +02:00
parent f693488c8a
commit 15b8ac64ef
9 changed files with 45 additions and 16 deletions

View File

@ -258,7 +258,8 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numStages = bpmem.genMode.numtevstages + 1;
unsigned int numTexgen = bpmem.genMode.numtexgens; unsigned int numTexgen = bpmem.genMode.numtexgens;
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.zcontrol.early_ztest && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || (!g_ActiveConfig.bFastDepthCalc && !forced_early_z);
out.Write("//Pixel Shader for TEV stages\n"); out.Write("//Pixel Shader for TEV stages\n");
out.Write("//%i TEV stages, %i texgens, %i IND stages\n", out.Write("//%i TEV stages, %i texgens, %i IND stages\n",
@ -372,6 +373,14 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
} }
out.Write("float4 clipPos;\n"); out.Write("float4 clipPos;\n");
} }
if (forced_early_z)
{
// HACK: This doesn't force the driver to write to depth buffer if alpha test fails.
// It just allows it, but it seems that all drivers do.
out.Write("layout(early_fragment_tests) in;\n");
}
out.Write("void main()\n{\n"); out.Write("void main()\n{\n");
} }
else else
@ -584,6 +593,7 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
uid_data.ztex_op = bpmem.ztex2.op; uid_data.ztex_op = bpmem.ztex2.op;
uid_data.per_pixel_depth = per_pixel_depth; uid_data.per_pixel_depth = per_pixel_depth;
uid_data.forced_early_z = forced_early_z;
uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
uid_data.early_ztest = bpmem.zcontrol.early_ztest; uid_data.early_ztest = bpmem.zcontrol.early_ztest;
uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel;
@ -1129,17 +1139,20 @@ static void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE Api
out.Write("\t\tdepth = 1.f;\n"); out.Write("\t\tdepth = 1.f;\n");
// HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
// or after texturing and alpha test. PC GPUs have no way to support this // or after texturing and alpha test. PC graphics APIs have no way to support this
// feature properly as of 2012: depth buffer and depth test are not // feature properly as of 2012: Depth buffer and depth test are not
// programmable and the depth test is always done after texturing. // programmable and the depth test is always done after texturing.
// Most importantly, PC GPUs do not allow writing to the z-buffer without // Most importantly, they do not allow writing to the z-buffer without
// writing a color value (unless color writing is disabled altogether). // writing a color value (unless color writing is disabled altogether).
// We implement "depth test before texturing" by discarding the fragment // We implement "depth test before texturing" by disabling alpha test when early-z is in use.
// when the alpha test fail. This is not a correct implementation because // It seems to be less buggy than not to update the depth buffer if alpha test fails,
// even if the depth test fails the fragment could be alpha blended, but // but both ways wouldn't be accurate.
// we don't have a choice.
uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable; // OpenGL 4.2 has a flag which allows the driver to still update the depth buffer
if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable)) // if alpha test fails. The driver doesn't have to, but I assume they all do because
// it's the much faster code path for the GPU.
uid_data.alpha_test_use_zcomploc_hack = bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ;
if (!uid_data.alpha_test_use_zcomploc_hack)
{ {
out.Write("\t\tdiscard;\n"); out.Write("\t\tdiscard;\n");
if (ApiType != API_D3D11) if (ApiType != API_D3D11)

View File

@ -112,6 +112,7 @@ struct pixel_shader_uid_data
u32 fast_depth_calc : 1; u32 fast_depth_calc : 1;
u32 per_pixel_depth : 1; u32 per_pixel_depth : 1;
u32 forced_early_z : 1;
u32 early_ztest : 1; u32 early_ztest : 1;
u32 xfregs_numTexGen_numTexGens : 4; u32 xfregs_numTexGen_numTexGens : 4;

View File

@ -155,7 +155,8 @@ struct VideoConfig
bool bSupportsPixelLighting; bool bSupportsPixelLighting;
bool bSupportsPrimitiveRestart; bool bSupportsPrimitiveRestart;
bool bSupportsSeparateAlphaFunction; bool bSupportsSeparateAlphaFunction;
bool bSupportsGLSLUBO; // needed by pixelShaderGen, so must stay in videoCommon bool bSupportsGLSLUBO; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
} backend_info; } backend_info;
// Utility // Utility

View File

@ -90,6 +90,7 @@ void InitBackendInfo()
g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsFormatReinterpretation = true;
g_Config.backend_info.bSupportsPixelLighting = true; g_Config.backend_info.bSupportsPixelLighting = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsEarlyZ = false;
IDXGIFactory* factory; IDXGIFactory* factory;
IDXGIAdapter* ad; IDXGIAdapter* ad;

View File

@ -96,6 +96,7 @@ void InitBackendInfo()
g_Config.backend_info.bSupportsDualSourceBlend = false; g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsFormatReinterpretation = true;
g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants; g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;
g_Config.backend_info.bSupportsEarlyZ = false;
// adapters // adapters
g_Config.backend_info.Adapters.clear(); g_Config.backend_info.Adapters.clear();

View File

@ -539,7 +539,7 @@ void ProgramShaderCache::CreateHeader ( void )
"%s\n" "%s\n"
"%s\n" "%s\n"
, v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : "140" , v==GLSLES3 ? "300 es" : v==GLSL_120 ? "120" : v==GLSL_130 ? "130" : v==GLSL_140 ? "140" : "150"
, v==GLSLES3 ? "precision highp float;" : "" , v==GLSLES3 ? "precision highp float;" : ""
, g_ActiveConfig.backend_info.bSupportsGLSLUBO && v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "" , g_ActiveConfig.backend_info.bSupportsGLSLUBO && v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
, v==GLSL_120 ? "attribute" : "in" , v==GLSL_120 ? "attribute" : "in"

View File

@ -342,6 +342,7 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsDualSourceBlend = false; g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsGLSLUBO = true; g_Config.backend_info.bSupportsGLSLUBO = true;
g_Config.backend_info.bSupportsPrimitiveRestart = false; g_Config.backend_info.bSupportsPrimitiveRestart = false;
g_Config.backend_info.bSupportsEarlyZ = false;
g_ogl_config.bSupportsGLSLCache = false; // XXX: Reenable once shaders compile correctly g_ogl_config.bSupportsGLSLCache = false; // XXX: Reenable once shaders compile correctly
g_ogl_config.bSupportsGLPinnedMemory = false; g_ogl_config.bSupportsGLPinnedMemory = false;
@ -433,6 +434,7 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsDualSourceBlend = GLEW_ARB_blend_func_extended; g_Config.backend_info.bSupportsDualSourceBlend = GLEW_ARB_blend_func_extended;
g_Config.backend_info.bSupportsGLSLUBO = GLEW_ARB_uniform_buffer_object; g_Config.backend_info.bSupportsGLSLUBO = GLEW_ARB_uniform_buffer_object;
g_Config.backend_info.bSupportsPrimitiveRestart = GLEW_VERSION_3_1 || GLEW_NV_primitive_restart; g_Config.backend_info.bSupportsPrimitiveRestart = GLEW_VERSION_3_1 || GLEW_NV_primitive_restart;
g_Config.backend_info.bSupportsEarlyZ = GLEW_ARB_shader_image_load_store;
g_ogl_config.bSupportsGLSLCache = GLEW_ARB_get_program_binary; g_ogl_config.bSupportsGLSLCache = GLEW_ARB_get_program_binary;
g_ogl_config.bSupportsGLPinnedMemory = GLEW_AMD_pinned_memory; g_ogl_config.bSupportsGLPinnedMemory = GLEW_AMD_pinned_memory;
@ -453,14 +455,21 @@ Renderer::Renderer()
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_120; g_ogl_config.eSupportedGLSLVersion = GLSL_120;
g_Config.backend_info.bSupportsDualSourceBlend = false; //TODO: implement dual source blend g_Config.backend_info.bSupportsDualSourceBlend = false; //TODO: implement dual source blend
g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+
} }
else if(strstr(g_ogl_config.glsl_version, "1.30")) else if(strstr(g_ogl_config.glsl_version, "1.30"))
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_130; g_ogl_config.eSupportedGLSLVersion = GLSL_130;
g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+
}
else if(strstr(g_ogl_config.glsl_version, "1.40"))
{
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
g_Config.backend_info.bSupportsEarlyZ = false; // layout keyword is only supported on glsl150+
} }
else else
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_140; g_ogl_config.eSupportedGLSLVersion = GLSL_150;
} }
#endif #endif
@ -489,10 +498,11 @@ Renderer::Renderer()
g_ogl_config.gl_renderer, g_ogl_config.gl_renderer,
g_ogl_config.gl_version).c_str(), 5000); g_ogl_config.gl_version).c_str(), 5000);
WARN_LOG(VIDEO,"Missing OGL Extensions: %s%s%s%s%s%s%s%s%s", WARN_LOG(VIDEO,"Missing OGL Extensions: %s%s%s%s%s%s%s%s%s%s",
g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "" : "DualSourceBlend ", g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "" : "DualSourceBlend ",
g_ActiveConfig.backend_info.bSupportsGLSLUBO ? "" : "UniformBuffer ", g_ActiveConfig.backend_info.bSupportsGLSLUBO ? "" : "UniformBuffer ",
g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ", g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ",
g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ",
g_ogl_config.bSupportsGLPinnedMemory ? "" : "PinnedMemory ", g_ogl_config.bSupportsGLPinnedMemory ? "" : "PinnedMemory ",
g_ogl_config.bSupportsGLSLCache ? "" : "ShaderCache ", g_ogl_config.bSupportsGLSLCache ? "" : "ShaderCache ",
g_ogl_config.bSupportsGLBaseVertex ? "" : "BaseVertex ", g_ogl_config.bSupportsGLBaseVertex ? "" : "BaseVertex ",

View File

@ -12,7 +12,8 @@ void ClearEFBCache();
enum GLSL_VERSION { enum GLSL_VERSION {
GLSL_120, GLSL_120,
GLSL_130, GLSL_130,
GLSL_140, // and above GLSL_140,
GLSL_150, // and above
GLSLES3 GLSLES3
}; };

View File

@ -129,9 +129,10 @@ void InitBackendInfo()
g_Config.backend_info.bUseRGBATextures = true; g_Config.backend_info.bUseRGBATextures = true;
g_Config.backend_info.bUseMinimalMipCount = false; g_Config.backend_info.bUseMinimalMipCount = false;
g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bSupports3DVision = false;
//g_Config.backend_info.bSupportsDualSourceBlend = true; // is gpu depenend and must be set in renderer //g_Config.backend_info.bSupportsDualSourceBlend = true; // is gpu dependent and must be set in renderer
g_Config.backend_info.bSupportsFormatReinterpretation = false; g_Config.backend_info.bSupportsFormatReinterpretation = false;
g_Config.backend_info.bSupportsPixelLighting = true; g_Config.backend_info.bSupportsPixelLighting = true;
//g_Config.backend_info.bSupportsEarlyZ = true; // is gpu dependent and must be set in renderer
// aamodes // aamodes
const char* caamodes[] = {_trans("None"), "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA", "4x SSAA"}; const char* caamodes[] = {_trans("None"), "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA", "4x SSAA"};