From 89be1cbf511591dba1e0e33868a67ca393d687fc Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 9 May 2013 17:48:48 +0200 Subject: [PATCH 1/2] recreate "per pixel depth" option and renamed it to fast depth calculation --- Source/Core/DolphinWX/Src/VideoConfigDiag.cpp | 3 +- .../Core/VideoCommon/Src/PixelShaderGen.cpp | 56 ++++++++++--------- Source/Core/VideoCommon/Src/PixelShaderGen.h | 2 +- Source/Core/VideoCommon/Src/VideoConfig.cpp | 4 ++ Source/Core/VideoCommon/Src/VideoConfig.h | 1 + 5 files changed, 38 insertions(+), 28 deletions(-) diff --git a/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp b/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp index 8ad34bed08..531fbe2cc4 100644 --- a/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp +++ b/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp @@ -87,6 +87,7 @@ wxString aa_desc = wxTRANSLATE("Reduces the amount of aliasing caused by rasteri wxString scaled_efb_copy_desc = wxTRANSLATE("Greatly increases quality of textures generated using render to texture effects.\nRaising the internal resolution will improve the effect of this setting.\nSlightly decreases performance and possibly causes issues (although unlikely).\n\nIf unsure, leave this checked."); wxString pixel_lighting_desc = wxTRANSLATE("Calculate lighting of 3D graphics per-pixel rather than per vertex.\nDecreases emulation speed by some percent (depending on your GPU).\nThis usually is a safe enhancement, but might cause issues sometimes.\n\nIf unsure, leave this unchecked."); wxString hacked_buffer_upload_desc = wxTRANSLATE("Use a hacked upload strategy to stream vertices.\nThis usually speed up, but is forbidden by OpenGL specification and may causes heavy glitches.\n\nIf unsure, leave this unchecked."); +wxString fast_depth_calc_desc = wxTRANSLATE("Use the hardware perspective division to calculate the depth value.\nThis speed up the GPU a lot as the pixel shader only need to calculate the samples which pass the depth test.\nBut as this doesn't have to be accurate enough, flickering may happen on some gpus."); wxString force_filtering_desc = wxTRANSLATE("Force texture filtering even if the emulated game explicitly disabled it.\nImproves texture quality slightly but causes glitches in some games.\n\nIf unsure, leave this unchecked."); wxString _3d_vision_desc = wxTRANSLATE("Enable 3D effects via stereoscopy using Nvidia 3D Vision technology if it's supported by your GPU.\nPossibly causes issues.\nRequires fullscreen to work.\n\nIf unsure, leave this unchecked."); wxString internal_res_desc = wxTRANSLATE("Specifies the resolution used to render at. A high resolution will improve visual quality a lot but is also quite heavy on performance and might cause glitches in certain games.\n\"Multiple of 640x528\" is a bit slower than \"Window Size\" but yields less issues. Generally speaking, the lower the internal resolution is, the better your performance will be.\n\nIf unsure, select 640x528."); @@ -502,7 +503,7 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string &title, con szr_other->Add(CreateCheckBox(page_hacks, _("OpenCL Texture Decoder"), wxGetTranslation(opencl_desc), vconfig.bEnableOpenCL)); szr_other->Add(CreateCheckBox(page_hacks, _("OpenMP Texture Decoder"), wxGetTranslation(omp_desc), vconfig.bOMPDecoder)); szr_other->Add(hacked_buffer_upload_cb = CreateCheckBox(page_hacks, _("Hacked Buffer Upload"), wxGetTranslation(hacked_buffer_upload_desc), vconfig.bHackedBufferUpload)); - + szr_other->Add(CreateCheckBox(page_hacks, _("Fast Depth Calucation"), wxGetTranslation(fast_depth_calc_desc), vconfig.bFastDepthCalc)); if (Core::GetState() != Core::CORE_UNINITIALIZED) hacked_buffer_upload_cb->Disable(); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 9ab5752e88..c97cd889ea 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -95,22 +95,23 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 compo uid->values[0] |= bpmem.genMode.numtevstages; // 4 uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 uid->values[0] |= dstAlphaMode << 8; // 2 + uid->values[0] |= g_ActiveConfig.bFastDepthCalc << 10; // 1 bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - uid->values[0] |= enablePL << 10; // 1 + uid->values[0] |= enablePL << 11; // 1 if (!enablePL) { - uid->values[0] |= xfregs.numTexGen.numTexGens << 11; // 4 + uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4 } AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult(); - uid->values[0] |= alphaPreTest << 15; // 2 + uid->values[0] |= alphaPreTest << 16; // 2 // numtexgens should be <= 8 for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) { - uid->values[0] |= xfregs.texMtxInfo[i].projection << (17+i); // 1 + uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1 } uid->values[1] = bpmem.genMode.numindstages; // 3 @@ -180,8 +181,9 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u *ptr++ = bpmem.ztex2.hex; // 2 *ptr++ = bpmem.zcontrol.hex; // 3 *ptr++ = bpmem.zmode.hex; // 4 - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 5 - *ptr++ = xfregs.numTexGen.hex; // 6 + *ptr++ = g_ActiveConfig.bFastDepthCalc; // 5 + *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6 + *ptr++ = xfregs.numTexGen.hex; // 7 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { @@ -193,28 +195,28 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u } for (unsigned int i = 0; i < 8; ++i) - *ptr++ = xfregs.texMtxInfo[i].hex; // 7-14 + *ptr++ = xfregs.texMtxInfo[i].hex; // 8-15 for (unsigned int i = 0; i < 16; ++i) - *ptr++ = bpmem.tevind[i].hex; // 15-30 + *ptr++ = bpmem.tevind[i].hex; // 16-31 - *ptr++ = bpmem.tevindref.hex; // 31 + *ptr++ = bpmem.tevindref.hex; // 32 for (u32 i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times { - *ptr++ = bpmem.combiners[i].colorC.hex; // 32+5*i - *ptr++ = bpmem.combiners[i].alphaC.hex; // 33+5*i - *ptr++ = bpmem.tevind[i].hex; // 34+5*i - *ptr++ = bpmem.tevksel[i/2].hex; // 35+5*i - *ptr++ = bpmem.tevorders[i/2].hex; // 36+5*i + *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i + *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i + *ptr++ = bpmem.tevind[i].hex; // 35+5*i + *ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i + *ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i } - ptr = &uid->values[112]; + ptr = &uid->values[113]; - *ptr++ = bpmem.alpha_test.hex; // 112 + *ptr++ = bpmem.alpha_test.hex; // 113 - *ptr++ = bpmem.fog.c_proj_fsel.hex; // 113 - *ptr++ = bpmem.fogRange.Base.hex; // 114 + *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114 + *ptr++ = bpmem.fogRange.Base.hex; // 115 _assert_((ptr - uid->values) == uid->GetNumValues()); } @@ -518,7 +520,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType int numStages = bpmem.genMode.numtevstages + 1; int numTexgen = bpmem.genMode.numtexgens; - bool per_pixel_depth = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable; + bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; bool bOpenGL = ApiType == API_OPENGL; char *p = text; WRITE(p, "//Pixel Shader for TEV stages\n"); @@ -820,15 +822,18 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WriteAlphaTest(p, ApiType, dstAlphaMode, per_pixel_depth); - // the screen space depth value = far z + (clip z / clip w) * z range - if(ApiType == API_OPENGL || ApiType == API_D3D11) + // dx9 doesn't support readback of depth in pixel shader, so we always have to calculate it again + // shouldn't be a performance issue as early_z is still possible, but there may be depth issues with z-textures + if((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc) WRITE(p, "float zCoord = rawpos.z;\n"); else - // dx9 doesn't support 4 component position, so we have to calculate it again + // the screen space depth value = far z + (clip z / clip w) * z range WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; + + // Note: depth texture out put is only written to depth buffer if late depth test is used if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... @@ -839,11 +844,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n"); WRITE(p, "zCoord = %s(zCoord);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n"); - - // Note: depth texture out put is only written to depth buffer if late depth test is used - if (per_pixel_depth) - WRITE(p, "depth = zCoord;\n"); } + + if (per_pixel_depth) + WRITE(p, "depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index b93659c01c..d2a3046939 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -32,7 +32,7 @@ #define C_PMATERIALS (C_PLIGHTS + 40) #define C_PENVCONST_END (C_PMATERIALS + 4) #define PIXELSHADERUID_MAX_VALUES 70 -#define PIXELSHADERUID_MAX_VALUES_SAFE 115 +#define PIXELSHADERUID_MAX_VALUES_SAFE 116 // Annoying sure, can be removed once we get up to GLSL ~1.3 const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index 0b70b5440f..85fac2da19 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -64,6 +64,7 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle, 0); iniFile.Get("Settings", "EnablePixelLighting", &bEnablePixelLighting, 0); iniFile.Get("Settings", "HackedBufferUpload", &bHackedBufferUpload, 0); + iniFile.Get("Settings", "FastDepthCalc", &bFastDepthCalc, 0); iniFile.Get("Settings", "MSAA", &iMultisampleMode, 0); iniFile.Get("Settings", "EFBScale", &iEFBScale, (int) SCALE_1X); // native @@ -123,6 +124,7 @@ void VideoConfig::GameIniLoad(const char *ini_file) iniFile.GetIfExists("Video_Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle); iniFile.GetIfExists("Video_Settings", "EnablePixelLighting", &bEnablePixelLighting); iniFile.GetIfExists("Video_Settings", "HackedBufferUpload", &bHackedBufferUpload); + iniFile.GetIfExists("Video_Settings", "FastDepthCalc", &bFastDepthCalc); iniFile.GetIfExists("Video_Settings", "MSAA", &iMultisampleMode); int tmp = -9000; iniFile.GetIfExists("Video_Settings", "EFBScale", &tmp); // integral @@ -219,6 +221,7 @@ void VideoConfig::Save(const char *ini_file) iniFile.Set("Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle); iniFile.Set("Settings", "EnablePixelLighting", bEnablePixelLighting); iniFile.Set("Settings", "HackedBufferUpload", bHackedBufferUpload); + iniFile.Set("Settings", "FastDepthCalc", bFastDepthCalc); iniFile.Set("Settings", "ShowEFBCopyRegions", bShowEFBCopyRegions); iniFile.Set("Settings", "MSAA", iMultisampleMode); @@ -283,6 +286,7 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini) SET_IF_DIFFERS("Video_Settings", "AnaglyphStereoSeparation", iAnaglyphStereoSeparation); SET_IF_DIFFERS("Video_Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle); SET_IF_DIFFERS("Video_Settings", "EnablePixelLighting", bEnablePixelLighting); + SET_IF_DIFFERS("Video_Settings", "FastDepthCalc", bFastDepthCalc); SET_IF_DIFFERS("Video_Settings", "MSAA", iMultisampleMode); SET_IF_DIFFERS("Video_Settings", "EFBScale", iEFBScale); // integral SET_IF_DIFFERS("Video_Settings", "DstAlphaPass", bDstAlphaPass); diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 9b5076df8c..ec4ad3ed3d 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -123,6 +123,7 @@ struct VideoConfig bool bUseBBox; bool bEnablePixelLighting; bool bHackedBufferUpload; + bool bFastDepthCalc; int iLog; // CONF_ bits int iSaveTargetId; // TODO: Should be dropped From f7c3cacb5cff2d02ee74ed7602ba3aae4f3278ba Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 10 May 2013 12:51:06 +0200 Subject: [PATCH 2/2] ppd: fix small issues in my last commit --- Source/Core/DolphinWX/Src/VideoConfigDiag.cpp | 4 ++-- Source/Core/VideoCommon/Src/PixelShaderGen.cpp | 10 +++++++--- Source/Core/VideoCommon/Src/VideoConfig.cpp | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp b/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp index 531fbe2cc4..45bf35046e 100644 --- a/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp +++ b/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp @@ -87,7 +87,7 @@ wxString aa_desc = wxTRANSLATE("Reduces the amount of aliasing caused by rasteri wxString scaled_efb_copy_desc = wxTRANSLATE("Greatly increases quality of textures generated using render to texture effects.\nRaising the internal resolution will improve the effect of this setting.\nSlightly decreases performance and possibly causes issues (although unlikely).\n\nIf unsure, leave this checked."); wxString pixel_lighting_desc = wxTRANSLATE("Calculate lighting of 3D graphics per-pixel rather than per vertex.\nDecreases emulation speed by some percent (depending on your GPU).\nThis usually is a safe enhancement, but might cause issues sometimes.\n\nIf unsure, leave this unchecked."); wxString hacked_buffer_upload_desc = wxTRANSLATE("Use a hacked upload strategy to stream vertices.\nThis usually speed up, but is forbidden by OpenGL specification and may causes heavy glitches.\n\nIf unsure, leave this unchecked."); -wxString fast_depth_calc_desc = wxTRANSLATE("Use the hardware perspective division to calculate the depth value.\nThis speed up the GPU a lot as the pixel shader only need to calculate the samples which pass the depth test.\nBut as this doesn't have to be accurate enough, flickering may happen on some gpus."); +wxString fast_depth_calc_desc = wxTRANSLATE("Use a less accurate algorithm to calculate depth values.\nCauses issues in a few games but might give a decent speedup.\n\nIf unsure, leave this checked."); wxString force_filtering_desc = wxTRANSLATE("Force texture filtering even if the emulated game explicitly disabled it.\nImproves texture quality slightly but causes glitches in some games.\n\nIf unsure, leave this unchecked."); wxString _3d_vision_desc = wxTRANSLATE("Enable 3D effects via stereoscopy using Nvidia 3D Vision technology if it's supported by your GPU.\nPossibly causes issues.\nRequires fullscreen to work.\n\nIf unsure, leave this unchecked."); wxString internal_res_desc = wxTRANSLATE("Specifies the resolution used to render at. A high resolution will improve visual quality a lot but is also quite heavy on performance and might cause glitches in certain games.\n\"Multiple of 640x528\" is a bit slower than \"Window Size\" but yields less issues. Generally speaking, the lower the internal resolution is, the better your performance will be.\n\nIf unsure, select 640x528."); @@ -503,7 +503,7 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string &title, con szr_other->Add(CreateCheckBox(page_hacks, _("OpenCL Texture Decoder"), wxGetTranslation(opencl_desc), vconfig.bEnableOpenCL)); szr_other->Add(CreateCheckBox(page_hacks, _("OpenMP Texture Decoder"), wxGetTranslation(omp_desc), vconfig.bOMPDecoder)); szr_other->Add(hacked_buffer_upload_cb = CreateCheckBox(page_hacks, _("Hacked Buffer Upload"), wxGetTranslation(hacked_buffer_upload_desc), vconfig.bHackedBufferUpload)); - szr_other->Add(CreateCheckBox(page_hacks, _("Fast Depth Calucation"), wxGetTranslation(fast_depth_calc_desc), vconfig.bFastDepthCalc)); + szr_other->Add(CreateCheckBox(page_hacks, _("Fast Depth Calculation"), wxGetTranslation(fast_depth_calc_desc), vconfig.bFastDepthCalc)); if (Core::GetState() != Core::CORE_UNINITIALIZED) hacked_buffer_upload_cb->Disable(); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index c97cd889ea..9fd71c0e09 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -823,7 +823,8 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType // dx9 doesn't support readback of depth in pixel shader, so we always have to calculate it again - // shouldn't be a performance issue as early_z is still possible, but there may be depth issues with z-textures + // shouldn't be a performance issue as the written depth is usually still from perspective division + // but this isn't true for z-textures, so there will be depth issues between enabled and disabled z-textures fragments if((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc) WRITE(p, "float zCoord = rawpos.z;\n"); else @@ -833,7 +834,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; - // Note: depth texture out put is only written to depth buffer if late depth test is used + // Note: z-textures are not written to depth buffer if early depth test is used + if (per_pixel_depth && bpmem.zcontrol.early_ztest) + WRITE(p, "depth = zCoord;\n"); + if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... @@ -846,7 +850,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n"); } - if (per_pixel_depth) + if (per_pixel_depth && !bpmem.zcontrol.early_ztest) WRITE(p, "depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index 85fac2da19..6e1bc31e55 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -64,7 +64,7 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle, 0); iniFile.Get("Settings", "EnablePixelLighting", &bEnablePixelLighting, 0); iniFile.Get("Settings", "HackedBufferUpload", &bHackedBufferUpload, 0); - iniFile.Get("Settings", "FastDepthCalc", &bFastDepthCalc, 0); + iniFile.Get("Settings", "FastDepthCalc", &bFastDepthCalc, true); iniFile.Get("Settings", "MSAA", &iMultisampleMode, 0); iniFile.Get("Settings", "EFBScale", &iEFBScale, (int) SCALE_1X); // native