diff --git a/Source/Core/VideoBackends/D3D/VertexManager.cpp b/Source/Core/VideoBackends/D3D/VertexManager.cpp index 5bd39d45b2..8f925452c2 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D/VertexManager.cpp @@ -178,9 +178,51 @@ void VertexManager::vFlush(bool useDstAlpha) } u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(); + u32 indices = IndexGenerator::GetIndexLen(); PrepareDrawBuffers(stride); + if (!bpmem.genMode.zfreeze && indices >= 3) + { + float vtx[9]; + float out[12]; + + // Lookup vertices of the last rendered triangle and software-transform them + // This allows us to determine the depth slope, which will be used if zfreeze + // is enabled in the following flush. + for (unsigned int i = 0; i < 3; ++i) + { + const int base_index = GetIndexBuffer()[indices - 3 + i]; + u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride]; + vtx[0 + i * 3] = ((float*)vtx_ptr)[0]; + vtx[1 + i * 3] = ((float*)vtx_ptr)[1]; + vtx[2 + i * 3] = ((float*)vtx_ptr)[2]; + + VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]); + + // viewport offset ignored because we only look at coordinate differences. + out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd; + out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht; + out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ; + } + float dx31 = out[8] - out[0]; + float dx12 = out[0] - out[4]; + float dy12 = out[1] - out[5]; + float dy31 = out[9] - out[1]; + + float DF31 = out[10] - out[2]; + float DF21 = out[6] - out[2]; + float a = DF31 * -dy12 - DF21 * dy31; + float b = dx31 * DF21 + dx12 * DF31; + float c = -dx12 * dy31 - dx31 * -dy12; + + float slope_dfdx = -a / c; + float slope_dfdy = -b / c; + float slope_f0 = out[2]; + + PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0); + } + VertexLoaderManager::GetCurrentVertexFormat()->SetupVertexPointers(); g_renderer->ApplyState(useDstAlpha); diff --git a/Source/Core/VideoBackends/D3D/VertexManager.h b/Source/Core/VideoBackends/D3D/VertexManager.h index 0b124d7512..38fcd088fd 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.h +++ b/Source/Core/VideoBackends/D3D/VertexManager.h @@ -22,6 +22,7 @@ public: protected: virtual void ResetBuffer(u32 stride) override; u16* GetIndexBuffer() { return &LocalIBuffer[0]; } + u8* GetVertexBuffer() { return &LocalVBuffer[0]; } private: diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index 1a162b1cde..d3a8d91bca 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -42,6 +42,13 @@ static size_t s_index_offset; VertexManager::VertexManager() { + LocalVBuffer.resize(MAXVBUFFERSIZE); + + s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0]; + s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size(); + + LocalIBuffer.resize(MAXIBUFFERSIZE); + CreateDeviceObjects(); } @@ -131,6 +138,7 @@ void VertexManager::vFlush(bool useDstAlpha) { GLVertexFormat *nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat(); u32 stride = nativeVertexFmt->GetVertexStride(); + u32 indices = IndexGenerator::GetIndexLen(); if (m_last_vao != nativeVertexFmt->VAO) { @@ -140,6 +148,47 @@ void VertexManager::vFlush(bool useDstAlpha) PrepareDrawBuffers(stride); + if (!bpmem.genMode.zfreeze && indices >= 3) + { + float vtx[9]; + float out[12]; + + // Lookup vertices of the last rendered triangle and software-transform them + // This allows us to determine the depth slope, which will be used if zfreeze + // is enabled in the following flush. + for (unsigned int i = 0; i < 3; ++i) + { + const int base_index = GetIndexBuffer()[indices - 3 + i]; + u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride]; + vtx[0 + i * 3] = ((float*)vtx_ptr)[0]; + vtx[1 + i * 3] = ((float*)vtx_ptr)[1]; + vtx[2 + i * 3] = ((float*)vtx_ptr)[2]; + + VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]); + + // viewport offset ignored because we only look at coordinate differences. + out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd; + out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht; + out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ; + } + float dx31 = out[8] - out[0]; + float dx12 = out[0] - out[4]; + float dy12 = out[1] - out[5]; + float dy31 = out[9] - out[1]; + + float DF31 = out[10] - out[2]; + float DF21 = out[6] - out[2]; + float a = DF31 * -dy12 - DF21 * dy31; + float b = dx31 * DF21 + dx12 * DF31; + float c = -dx12 * dy31 - dx31 * -dy12; + + float slope_dfdx = -a / c; + float slope_dfdy = -b / c; + float slope_f0 = out[2]; + + PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0); + } + // Makes sure we can actually do Dual source blending bool dualSourcePossible = g_ActiveConfig.backend_info.bSupportsDualSourceBlend; diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 1f527fd9c0..0e9efd9c83 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -42,10 +42,15 @@ public: GLuint m_last_vao; protected: virtual void ResetBuffer(u32 stride) override; + u16* GetIndexBuffer() { return &LocalIBuffer[0]; } + u8* GetVertexBuffer() { return &LocalVBuffer[0]; } private: void Draw(u32 stride); void vFlush(bool useDstAlpha) override; void PrepareDrawBuffers(u32 stride); + + std::vector LocalVBuffer; + std::vector LocalIBuffer; }; } diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 9bfce8aac1..b7b3d6664c 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -23,6 +23,7 @@ struct PixelShaderConstants int4 fogcolor; int4 fogi; float4 fogf[2]; + float4 zslope; }; struct VertexShaderConstants diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index edc67cc83c..7afb21056c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -228,6 +228,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "\tint4 " I_FOGCOLOR";\n" "\tint4 " I_FOGI";\n" "\tfloat4 " I_FOGF"[2];\n" + "\tfloat4 " I_ZSLOPE";\n" "};\n"); if (g_ActiveConfig.bEnablePixelLighting) @@ -269,7 +270,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("};\n"); const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); - const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z); + const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze; if (forced_early_z) { @@ -538,10 +539,20 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data->early_ztest = bpmem.UseEarlyDepthTest(); uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data->zfreeze = bpmem.genMode.zfreeze; // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest()) - out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + { + if (bpmem.genMode.zfreeze) + { + out.Write("\tdepth = " I_ZSLOPE".z + " I_ZSLOPE".x * (clipPos.x / clipPos.w) + " I_ZSLOPE".y * (clipPos.y / clipPos.w);\n"); + } + else + { + out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + } + } // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway @@ -555,7 +566,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } if (per_pixel_depth && bpmem.UseLateDepthTest()) - out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + { + if (bpmem.genMode.zfreeze) + { + out.Write("\tdepth = " I_ZSLOPE".z + " I_ZSLOPE".x * (clipPos.x / clipPos.w) + " I_ZSLOPE".y * (clipPos.y / clipPos.w);\n"); + } + else + { + out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + } + } if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 784523087a..c889bd62a0 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -21,8 +21,9 @@ #define C_FOGCOLOR (C_INDTEXMTX + 6) //27 #define C_FOGI (C_FOGCOLOR + 1) //28 #define C_FOGF (C_FOGI + 1) //29 +#define C_ZSLOPE (C_FOGF + 1) //30 -#define C_PENVCONST_END (C_FOGF + 2) +#define C_PENVCONST_END (C_ZSLOPE + 2) // Different ways to achieve rendering with destination alpha enum DSTALPHA_MODE @@ -62,6 +63,7 @@ struct pixel_shader_uid_data u32 forced_early_z : 1; u32 early_ztest : 1; u32 bounding_box : 1; + u32 zfreeze : 1; u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 0c6d4b73b3..b55147eb15 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -14,6 +14,8 @@ bool PixelShaderManager::s_bFogRangeAdjustChanged; bool PixelShaderManager::s_bViewPortChanged; +bool PixelShaderManager::s_bZSlopeChanged; +static float zslope[3]; std::array PixelShaderManager::s_tev_color; std::array PixelShaderManager::s_tev_konst_color; @@ -48,6 +50,7 @@ void PixelShaderManager::Dirty() SetDestAlpha(); SetZTextureBias(); SetViewportChanged(); + SetZSlopeChanged(0, 0, 1); SetIndTexScaleChanged(false); SetIndTexScaleChanged(true); SetIndMatrixChanged(0); @@ -112,6 +115,17 @@ void PixelShaderManager::SetConstants() dirty = true; s_bViewPortChanged = false; } + + if (s_bZSlopeChanged) + { + constants.zslope[0] = zslope[0]; + constants.zslope[1] = zslope[1]; + constants.zslope[2] = zslope[2]; + constants.zslope[3] = 0; + + dirty = true; + s_bZSlopeChanged = false; + } } void PixelShaderManager::SetTevColor(int index, int component, s32 value) @@ -168,6 +182,14 @@ void PixelShaderManager::SetViewportChanged() s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation } +void PixelShaderManager::SetZSlopeChanged(float dfdx, float dfdy, float f0) +{ + zslope[0] = dfdx; + zslope[1] = dfdy; + zslope[2] = f0; + s_bZSlopeChanged = true; +} + void PixelShaderManager::SetIndTexScaleChanged(bool high) { constants.indtexscale[high][0] = bpmem.texscale[high].ss0; diff --git a/Source/Core/VideoCommon/PixelShaderManager.h b/Source/Core/VideoCommon/PixelShaderManager.h index ebf299d9fc..16c760f70f 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.h +++ b/Source/Core/VideoCommon/PixelShaderManager.h @@ -36,6 +36,7 @@ public: static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); static void SetZTextureBias(); static void SetViewportChanged(); + static void SetZSlopeChanged(float dfdx, float dfdy, float f0); static void SetIndMatrixChanged(int matrixidx); static void SetTevKSelChanged(int id); static void SetZTextureTypeChanged(); @@ -50,6 +51,7 @@ public: static bool s_bFogRangeAdjustChanged; static bool s_bViewPortChanged; + static bool s_bZSlopeChanged; // These colors aren't available from global BP state, // hence we keep a copy of them around. diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 571f8db5c5..dd80fd3987 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -291,6 +291,7 @@ static inline void AssignVSOutputMembers(T& object, const char* a, const char* b #define I_FOGCOLOR "cfogcolor" #define I_FOGI "cfogi" #define I_FOGF "cfogf" +#define I_ZSLOPE "czslope" #define I_POSNORMALMATRIX "cpnmtx" #define I_PROJECTION "cproj" diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 4ca20a21f4..a745f7004f 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -690,6 +690,24 @@ void VertexShaderManager::ResetView() bProjectionChanged = true; } +void VertexShaderManager::TransformToClipSpace(const float* data, float *out) +{ + const float *world_matrix = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4; + const float *proj_matrix = &g_fProjectionMatrix[0]; + + float t[3]; + t[0] = data[0] * world_matrix[0] + data[1] * world_matrix[1] + data[2] * world_matrix[2] + world_matrix[3]; + t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7]; + t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11]; + + // TODO: this requires g_fProjectionMatrix to be up to date, which is not really a good design decision. + + out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3]; + out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7]; + out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11]; + out[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15]; +} + void VertexShaderManager::DoState(PointerWrap &p) { p.Do(g_fProjectionMatrix); diff --git a/Source/Core/VideoCommon/VertexShaderManager.h b/Source/Core/VideoCommon/VertexShaderManager.h index d99f07fe21..229ba1f599 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.h +++ b/Source/Core/VideoCommon/VertexShaderManager.h @@ -34,6 +34,12 @@ public: static void RotateView(float x, float y); static void ResetView(); + // data: 3 floats representing the X, Y and Z vertex model coordinates + // out: 4 floats which will be initialized with the corresponding clip space coordinates + // NOTE: g_fProjectionMatrix must be up to date when this is called + // (i.e. VertexShaderManager::SetConstants needs to be called before using this!) + static void TransformToClipSpace(const float* data, float *out); + static VertexShaderConstants constants; static bool dirty; };