diff --git a/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp b/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp index 8ad34bed08..45bf35046e 100644 --- a/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp +++ b/Source/Core/DolphinWX/Src/VideoConfigDiag.cpp @@ -87,6 +87,7 @@ wxString aa_desc = wxTRANSLATE("Reduces the amount of aliasing caused by rasteri wxString scaled_efb_copy_desc = wxTRANSLATE("Greatly increases quality of textures generated using render to texture effects.\nRaising the internal resolution will improve the effect of this setting.\nSlightly decreases performance and possibly causes issues (although unlikely).\n\nIf unsure, leave this checked."); wxString pixel_lighting_desc = wxTRANSLATE("Calculate lighting of 3D graphics per-pixel rather than per vertex.\nDecreases emulation speed by some percent (depending on your GPU).\nThis usually is a safe enhancement, but might cause issues sometimes.\n\nIf unsure, leave this unchecked."); wxString hacked_buffer_upload_desc = wxTRANSLATE("Use a hacked upload strategy to stream vertices.\nThis usually speed up, but is forbidden by OpenGL specification and may causes heavy glitches.\n\nIf unsure, leave this unchecked."); +wxString fast_depth_calc_desc = wxTRANSLATE("Use a less accurate algorithm to calculate depth values.\nCauses issues in a few games but might give a decent speedup.\n\nIf unsure, leave this checked."); wxString force_filtering_desc = wxTRANSLATE("Force texture filtering even if the emulated game explicitly disabled it.\nImproves texture quality slightly but causes glitches in some games.\n\nIf unsure, leave this unchecked."); wxString _3d_vision_desc = wxTRANSLATE("Enable 3D effects via stereoscopy using Nvidia 3D Vision technology if it's supported by your GPU.\nPossibly causes issues.\nRequires fullscreen to work.\n\nIf unsure, leave this unchecked."); wxString internal_res_desc = wxTRANSLATE("Specifies the resolution used to render at. A high resolution will improve visual quality a lot but is also quite heavy on performance and might cause glitches in certain games.\n\"Multiple of 640x528\" is a bit slower than \"Window Size\" but yields less issues. Generally speaking, the lower the internal resolution is, the better your performance will be.\n\nIf unsure, select 640x528."); @@ -502,7 +503,7 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string &title, con szr_other->Add(CreateCheckBox(page_hacks, _("OpenCL Texture Decoder"), wxGetTranslation(opencl_desc), vconfig.bEnableOpenCL)); szr_other->Add(CreateCheckBox(page_hacks, _("OpenMP Texture Decoder"), wxGetTranslation(omp_desc), vconfig.bOMPDecoder)); szr_other->Add(hacked_buffer_upload_cb = CreateCheckBox(page_hacks, _("Hacked Buffer Upload"), wxGetTranslation(hacked_buffer_upload_desc), vconfig.bHackedBufferUpload)); - + szr_other->Add(CreateCheckBox(page_hacks, _("Fast Depth Calculation"), wxGetTranslation(fast_depth_calc_desc), vconfig.bFastDepthCalc)); if (Core::GetState() != Core::CORE_UNINITIALIZED) hacked_buffer_upload_cb->Disable(); diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index c1e57815f4..e6a0512d7f 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -95,22 +95,23 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 compo uid->values[0] |= bpmem.genMode.numtevstages; // 4 uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 uid->values[0] |= dstAlphaMode << 8; // 2 + uid->values[0] |= g_ActiveConfig.bFastDepthCalc << 10; // 1 bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; - uid->values[0] |= enablePL << 10; // 1 + uid->values[0] |= enablePL << 11; // 1 if (!enablePL) { - uid->values[0] |= xfregs.numTexGen.numTexGens << 11; // 4 + uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4 } AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult(); - uid->values[0] |= alphaPreTest << 15; // 2 + uid->values[0] |= alphaPreTest << 16; // 2 // numtexgens should be <= 8 for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i) { - uid->values[0] |= xfregs.texMtxInfo[i].projection << (17+i); // 1 + uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1 } uid->values[1] = bpmem.genMode.numindstages; // 3 @@ -180,8 +181,9 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u *ptr++ = bpmem.ztex2.hex; // 2 *ptr++ = bpmem.zcontrol.hex; // 3 *ptr++ = bpmem.zmode.hex; // 4 - *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 5 - *ptr++ = xfregs.numTexGen.hex; // 6 + *ptr++ = g_ActiveConfig.bFastDepthCalc; // 5 + *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6 + *ptr++ = xfregs.numTexGen.hex; // 7 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { @@ -193,28 +195,28 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u } for (unsigned int i = 0; i < 8; ++i) - *ptr++ = xfregs.texMtxInfo[i].hex; // 7-14 + *ptr++ = xfregs.texMtxInfo[i].hex; // 8-15 for (unsigned int i = 0; i < 16; ++i) - *ptr++ = bpmem.tevind[i].hex; // 15-30 + *ptr++ = bpmem.tevind[i].hex; // 16-31 - *ptr++ = bpmem.tevindref.hex; // 31 + *ptr++ = bpmem.tevindref.hex; // 32 for (u32 i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times { - *ptr++ = bpmem.combiners[i].colorC.hex; // 32+5*i - *ptr++ = bpmem.combiners[i].alphaC.hex; // 33+5*i - *ptr++ = bpmem.tevind[i].hex; // 34+5*i - *ptr++ = bpmem.tevksel[i/2].hex; // 35+5*i - *ptr++ = bpmem.tevorders[i/2].hex; // 36+5*i + *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i + *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i + *ptr++ = bpmem.tevind[i].hex; // 35+5*i + *ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i + *ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i } - ptr = &uid->values[112]; + ptr = &uid->values[113]; - *ptr++ = bpmem.alpha_test.hex; // 112 + *ptr++ = bpmem.alpha_test.hex; // 113 - *ptr++ = bpmem.fog.c_proj_fsel.hex; // 113 - *ptr++ = bpmem.fogRange.Base.hex; // 114 + *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114 + *ptr++ = bpmem.fogRange.Base.hex; // 115 _assert_((ptr - uid->values) == uid->GetNumValues()); } @@ -518,7 +520,7 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType int numStages = bpmem.genMode.numtevstages + 1; int numTexgen = bpmem.genMode.numtexgens; - bool per_pixel_depth = bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable; + bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc; bool bOpenGL = ApiType == API_OPENGL; char *p = text; WRITE(p, "//Pixel Shader for TEV stages\n"); @@ -820,15 +822,22 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WriteAlphaTest(p, ApiType, dstAlphaMode, per_pixel_depth); - // the screen space depth value = far z + (clip z / clip w) * z range - if(ApiType == API_OPENGL || ApiType == API_D3D11) + // dx9 doesn't support readback of depth in pixel shader, so we always have to calculate it again + // shouldn't be a performance issue as the written depth is usually still from perspective division + // but this isn't true for z-textures, so there will be depth issues between enabled and disabled z-textures fragments + if((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc) WRITE(p, "float zCoord = rawpos.z;\n"); else - // dx9 doesn't support 4 component position, so we have to calculate it again + // the screen space depth value = far z + (clip z / clip w) * z range WRITE(p, "float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; + + // Note: z-textures are not written to depth buffer if early depth test is used + if (per_pixel_depth && bpmem.zcontrol.early_ztest) + WRITE(p, "depth = zCoord;\n"); + if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... @@ -839,11 +848,10 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType WRITE(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n"); WRITE(p, "zCoord = %s(zCoord);\n", GLSLConvertFunctions[FUNC_FRAC + bOpenGL]); WRITE(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n"); - - // Note: depth texture out put is only written to depth buffer if late depth test is used - if (per_pixel_depth) - WRITE(p, "depth = zCoord;\n"); } + + if (per_pixel_depth && !bpmem.zcontrol.early_ztest) + WRITE(p, "depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index b93659c01c..d2a3046939 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -32,7 +32,7 @@ #define C_PMATERIALS (C_PLIGHTS + 40) #define C_PENVCONST_END (C_PMATERIALS + 4) #define PIXELSHADERUID_MAX_VALUES 70 -#define PIXELSHADERUID_MAX_VALUES_SAFE 115 +#define PIXELSHADERUID_MAX_VALUES_SAFE 116 // Annoying sure, can be removed once we get up to GLSL ~1.3 const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index 0b70b5440f..6e1bc31e55 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -64,6 +64,7 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle, 0); iniFile.Get("Settings", "EnablePixelLighting", &bEnablePixelLighting, 0); iniFile.Get("Settings", "HackedBufferUpload", &bHackedBufferUpload, 0); + iniFile.Get("Settings", "FastDepthCalc", &bFastDepthCalc, true); iniFile.Get("Settings", "MSAA", &iMultisampleMode, 0); iniFile.Get("Settings", "EFBScale", &iEFBScale, (int) SCALE_1X); // native @@ -123,6 +124,7 @@ void VideoConfig::GameIniLoad(const char *ini_file) iniFile.GetIfExists("Video_Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle); iniFile.GetIfExists("Video_Settings", "EnablePixelLighting", &bEnablePixelLighting); iniFile.GetIfExists("Video_Settings", "HackedBufferUpload", &bHackedBufferUpload); + iniFile.GetIfExists("Video_Settings", "FastDepthCalc", &bFastDepthCalc); iniFile.GetIfExists("Video_Settings", "MSAA", &iMultisampleMode); int tmp = -9000; iniFile.GetIfExists("Video_Settings", "EFBScale", &tmp); // integral @@ -219,6 +221,7 @@ void VideoConfig::Save(const char *ini_file) iniFile.Set("Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle); iniFile.Set("Settings", "EnablePixelLighting", bEnablePixelLighting); iniFile.Set("Settings", "HackedBufferUpload", bHackedBufferUpload); + iniFile.Set("Settings", "FastDepthCalc", bFastDepthCalc); iniFile.Set("Settings", "ShowEFBCopyRegions", bShowEFBCopyRegions); iniFile.Set("Settings", "MSAA", iMultisampleMode); @@ -283,6 +286,7 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini) SET_IF_DIFFERS("Video_Settings", "AnaglyphStereoSeparation", iAnaglyphStereoSeparation); SET_IF_DIFFERS("Video_Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle); SET_IF_DIFFERS("Video_Settings", "EnablePixelLighting", bEnablePixelLighting); + SET_IF_DIFFERS("Video_Settings", "FastDepthCalc", bFastDepthCalc); SET_IF_DIFFERS("Video_Settings", "MSAA", iMultisampleMode); SET_IF_DIFFERS("Video_Settings", "EFBScale", iEFBScale); // integral SET_IF_DIFFERS("Video_Settings", "DstAlphaPass", bDstAlphaPass); diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 9b5076df8c..ec4ad3ed3d 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -123,6 +123,7 @@ struct VideoConfig bool bUseBBox; bool bEnablePixelLighting; bool bHackedBufferUpload; + bool bFastDepthCalc; int iLog; // CONF_ bits int iSaveTargetId; // TODO: Should be dropped