diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index bd57f0c888..7c76628dd4 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -272,10 +272,10 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
 	out.Write("};\n");
 
 	const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest()
-								&& (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
-								// We can't allow early_ztest for zfreeze because a reference poly is used
-								// to control the depth and we need a depth test after the alpha test.
-								&& !bpmem.genMode.zfreeze;
+	                            && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
+	                            // We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
+	                            // This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
+	                            && !bpmem.genMode.zfreeze;
 	const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
 
 	if (forced_early_z)
@@ -1028,9 +1028,9 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_T
 	// important that a reliable alpha test, so we just force the alpha test to always succeed.
 	// At least this seems to be less buggy.
 	uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest()
-											 && bpmem.zmode.updateenable
-											 && !g_ActiveConfig.backend_info.bSupportsEarlyZ
-											 && !bpmem.genMode.zfreeze; // Might not be neccessary
+	                                         && bpmem.zmode.updateenable
+	                                         && !g_ActiveConfig.backend_info.bSupportsEarlyZ
+	                                         && !bpmem.genMode.zfreeze;
 
 	if (!uid_data->alpha_test_use_zcomploc_hack)
 	{
@@ -1123,7 +1123,7 @@ static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, A
 
 		// Opengl has reversed vertical screenspace coordiantes
 		if (ApiType == API_OPENGL)
-			out.Write("\tscreenpos.y = %i - screenpos.y - 1;\n", EFB_HEIGHT);
+			out.Write("\tscreenpos.y = %i - screenpos.y;\n", EFB_HEIGHT);
 
 		out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xFFFFFF);\n");
 	}
diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h
index b37fa59ab3..6d063ca419 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.h
+++ b/Source/Core/VideoCommon/PixelShaderGen.h
@@ -64,7 +64,10 @@ struct pixel_shader_uid_data
 	u32 forced_early_z : 1;
 	u32 early_ztest : 1;
 	u32 bounding_box : 1;
+
+	// TODO: 31 bits of padding is a waste. Can we free up some bits elseware?
 	u32 zfreeze : 1;
+	u32 pad : 31;
 
 	u32 texMtxInfo_n_projection : 8; // 8x1 bit
 	u32 tevindref_bi0 : 3;
diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp
index 3f6c672c65..4c13a26736 100644
--- a/Source/Core/VideoCommon/VertexManagerBase.cpp
+++ b/Source/Core/VideoCommon/VertexManagerBase.cpp
@@ -250,7 +250,7 @@ void VertexManager::CalculateZSlope(u32 stride)
 	float vtx[9];
 	float out[12];
 	float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
-						    xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
+	                        xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
 
 	// Lookup vertices of the last rendered triangle and software-transform them
 	// This allows us to determine the depth slope, which will be used if zfreeze