diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index a63fafc2a5..2b3ce95e08 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -64,7 +64,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 38; +static const u32 STATE_VERSION = 39; enum { diff --git a/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp index a0762578e1..abd9c569b2 100644 --- a/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp @@ -59,7 +59,7 @@ DXGI_FORMAT VarToD3D(VarType t, int size, bool integer) void D3DVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl) { - vertex_stride = _vtx_decl.stride; + vtx_decl = _vtx_decl; memset(m_elems, 0, sizeof(m_elems)); const AttributeFormat* format = &_vtx_decl.position; diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 268a408022..ea6bb924b6 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -33,6 +33,7 @@ #include "VideoCommon/ImageWrite.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" @@ -231,6 +232,7 @@ Renderer::Renderer(void *&window_handle) s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); + PixelShaderManager::SetEfbScaleChanged(); SetupDeviceObjects(); @@ -946,6 +948,8 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); + PixelShaderManager::SetEfbScaleChanged(); + D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr); delete g_framebuffer_manager; diff --git a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp index 4207a5a4af..8b1e4ded24 100644 --- a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp @@ -58,7 +58,7 @@ static void SetPointer(u32 attrib, u32 stride, const AttributeFormat &format) void GLVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl) { this->vtx_decl = _vtx_decl; - vertex_stride = vtx_decl.stride; + u32 vertex_stride = _vtx_decl.stride; // We will not allow vertex components causing uneven strides. if (vertex_stride & 3) diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 961d50f9c7..3d1991fe3f 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -43,6 +43,7 @@ #include "VideoCommon/ImageWrite.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderGen.h" @@ -618,6 +619,8 @@ Renderer::Renderer() s_last_efb_scale = g_ActiveConfig.iEFBScale; CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); + PixelShaderManager::SetEfbScaleChanged(); + // Because of the fixed framebuffer size we need to disable the resolution // options while running g_Config.bRunning = true; @@ -1681,6 +1684,8 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co delete g_framebuffer_manager; g_framebuffer_manager = new FramebufferManager(s_target_width, s_target_height, s_MSAASamples); + + PixelShaderManager::SetEfbScaleChanged(); } } diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index 1a162b1cde..5b89e0507e 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -43,6 +43,8 @@ static size_t s_index_offset; VertexManager::VertexManager() { CreateDeviceObjects(); + CpuVBuffer.resize(MAX_VBUFFER_SIZE); + CpuIBuffer.resize(MAX_IBUFFER_SIZE); } VertexManager::~VertexManager() @@ -81,14 +83,25 @@ void VertexManager::PrepareDrawBuffers(u32 stride) void VertexManager::ResetBuffer(u32 stride) { - auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride); - s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first; - s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE; - s_baseVertex = buffer.second / stride; + if (CullAll) + { + // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. + s_pCurBufferPointer = s_pBaseBufferPointer = CpuVBuffer.data(); + s_pEndBufferPointer = s_pBaseBufferPointer + CpuVBuffer.size(); - buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16)); - IndexGenerator::Start((u16*)buffer.first); - s_index_offset = buffer.second; + IndexGenerator::Start((u16*)CpuIBuffer.data()); + } + else + { + auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride); + s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first; + s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE; + s_baseVertex = buffer.second / stride; + + buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16)); + IndexGenerator::Start((u16*)buffer.first); + s_index_offset = buffer.second; + } } void VertexManager::Draw(u32 stride) diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 1f527fd9c0..ba6e49c466 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -13,8 +13,6 @@ namespace OGL { class GLVertexFormat : public NativeVertexFormat { - PortableVertexDeclaration vtx_decl; - public: GLVertexFormat(); ~GLVertexFormat(); @@ -42,10 +40,15 @@ public: GLuint m_last_vao; protected: virtual void ResetBuffer(u32 stride) override; + private: void Draw(u32 stride); void vFlush(bool useDstAlpha) override; void PrepareDrawBuffers(u32 stride); + + // Alternative buffers in CPU memory for primatives we are going to discard. + std::vector CpuVBuffer; + std::vector CpuIBuffer; }; } diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 9bfce8aac1..8fc1c221ed 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -23,6 +23,8 @@ struct PixelShaderConstants int4 fogcolor; int4 fogi; float4 fogf[2]; + float4 zslope; + float4 efbscale; }; struct VertexShaderConstants diff --git a/Source/Core/VideoCommon/GeometryShaderManager.cpp b/Source/Core/VideoCommon/GeometryShaderManager.cpp index 3e96592625..0a07cf683c 100644 --- a/Source/Core/VideoCommon/GeometryShaderManager.cpp +++ b/Source/Core/VideoCommon/GeometryShaderManager.cpp @@ -26,7 +26,11 @@ void GeometryShaderManager::Init() { memset(&constants, 0, sizeof(constants)); - Dirty(); + // Init any intial constants which aren't zero when bpmem is zero. + SetViewportChanged(); + SetProjectionChanged(); + + dirty = true; } void GeometryShaderManager::Shutdown() @@ -35,12 +39,9 @@ void GeometryShaderManager::Shutdown() void GeometryShaderManager::Dirty() { - SetViewportChanged(); - SetProjectionChanged(); - SetLinePtWidthChanged(); - - for (int i = 0; i < 8; i++) - SetTexCoordChanged(i); + // This function is called after a savestate is loaded. + // Any constants that can changed based on settings should be re-calculated + s_projection_changed = true; dirty = true; } @@ -110,9 +111,14 @@ void GeometryShaderManager::SetTexCoordChanged(u8 texmapid) void GeometryShaderManager::DoState(PointerWrap &p) { + p.Do(s_projection_changed); + p.Do(s_viewport_changed); + + p.Do(constants); + if (p.GetMode() == PointerWrap::MODE_READ) { - // Reload current state from global GPU state + // Fixup the current state from global GPU state // NOTE: This requires that all GPU memory has been loaded already. Dirty(); } diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 024f4f070d..5a77c3eb03 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -109,7 +109,8 @@ public: virtual void Initialize(const PortableVertexDeclaration &vtx_decl) = 0; virtual void SetupVertexPointers() = 0; - u32 GetVertexStride() const { return vertex_stride; } + u32 GetVertexStride() const { return vtx_decl.stride; } + const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; } // TODO: move this under private: u32 m_components; // VB_HAS_X. Bitmask telling what vertex components are present. @@ -118,5 +119,5 @@ protected: // Let subclasses construct. NativeVertexFormat() {} - u32 vertex_stride; + PortableVertexDeclaration vtx_decl; }; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index edc67cc83c..7c76628dd4 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -144,6 +144,7 @@ template static inline void WriteTevRegular(T& out, const char* compone template static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); template static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); template static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data); +template static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType); template static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) @@ -228,6 +229,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "\tint4 " I_FOGCOLOR";\n" "\tint4 " I_FOGI";\n" "\tfloat4 " I_FOGF"[2];\n" + "\tfloat4 " I_ZSLOPE";\n" + "\tfloat4 " I_EFBSCALE";\n" "};\n"); if (g_ActiveConfig.bEnablePixelLighting) @@ -268,8 +271,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T GenerateVSOutputMembers(out, ApiType); out.Write("};\n"); - const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); - const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z); + const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() + && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) + // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. + // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. + && !bpmem.genMode.zfreeze; + const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze; if (forced_early_z) { @@ -362,7 +369,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("void main(\n"); out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", - per_pixel_depth ? "\n out float depth : SV_Depth," : ""); + (per_pixel_depth && bpmem.zmode.testenable) ? "\n out float depth : SV_Depth," : ""); out.Write(" in centroid float4 colors_0 : COLOR0,\n"); out.Write(" in centroid float4 colors_1 : COLOR1\n"); @@ -538,10 +545,13 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data->early_ztest = bpmem.UseEarlyDepthTest(); uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; + uid_data->zfreeze = bpmem.genMode.zfreeze; // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest()) - out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + { + WritePerPixelDepth(out, uid_data, ApiType); + } // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway @@ -555,7 +565,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } if (per_pixel_depth && bpmem.UseLateDepthTest()) - out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + { + WritePerPixelDepth(out, uid_data, ApiType); + } if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { @@ -1015,7 +1027,11 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_T // Tests seem to have proven that writing depth even when the alpha test fails is more // important that a reliable alpha test, so we just force the alpha test to always succeed. // At least this seems to be less buggy. - uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ; + uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() + && bpmem.zmode.updateenable + && !g_ActiveConfig.backend_info.bSupportsEarlyZ + && !bpmem.genMode.zfreeze; + if (!uid_data->alpha_test_use_zcomploc_hack) { out.Write("\t\tdiscard;\n"); @@ -1095,6 +1111,29 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data) out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); } +template +static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType) +{ + if (bpmem.genMode.zfreeze) + { + out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE); + out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE); + + out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE".xy;\n"); + + // Opengl has reversed vertical screenspace coordiantes + if (ApiType == API_OPENGL) + out.Write("\tscreenpos.y = %i - screenpos.y;\n", EFB_HEIGHT); + + out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xFFFFFF);\n"); + } + else + { + out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + } +} + + void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { GeneratePixelShader(object, dstAlphaMode, ApiType, components); diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 784523087a..6d063ca419 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -21,8 +21,10 @@ #define C_FOGCOLOR (C_INDTEXMTX + 6) //27 #define C_FOGI (C_FOGCOLOR + 1) //28 #define C_FOGF (C_FOGI + 1) //29 +#define C_ZSLOPE (C_FOGF + 2) //31 +#define C_EFBSCALE (C_ZSLOPE + 1) //32 -#define C_PENVCONST_END (C_FOGF + 2) +#define C_PENVCONST_END (C_EFBSCALE + 1) // Different ways to achieve rendering with destination alpha enum DSTALPHA_MODE @@ -63,6 +65,10 @@ struct pixel_shader_uid_data u32 early_ztest : 1; u32 bounding_box : 1; + // TODO: 31 bits of padding is a waste. Can we free up some bits elseware? + u32 zfreeze : 1; + u32 pad : 31; + u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; u32 tevindref_bc0 : 3; diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 0c6d4b73b3..b1a68248ee 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -15,41 +15,18 @@ bool PixelShaderManager::s_bFogRangeAdjustChanged; bool PixelShaderManager::s_bViewPortChanged; -std::array PixelShaderManager::s_tev_color; -std::array PixelShaderManager::s_tev_konst_color; - PixelShaderConstants PixelShaderManager::constants; bool PixelShaderManager::dirty; void PixelShaderManager::Init() { memset(&constants, 0, sizeof(constants)); - memset(s_tev_color.data(), 0, sizeof(s_tev_color)); - memset(s_tev_konst_color.data(), 0, sizeof(s_tev_konst_color)); - Dirty(); -} - -void PixelShaderManager::Dirty() -{ + // Init any intial constants which aren't zero when bpmem is zero. s_bFogRangeAdjustChanged = true; - s_bViewPortChanged = true; + s_bViewPortChanged = false; - for (unsigned index = 0; index < s_tev_color.size(); ++index) - { - for (int comp = 0; comp < 4; ++comp) - { - SetTevColor(index, comp, s_tev_color[index][comp]); - SetTevKonstColor(index, comp, s_tev_konst_color[index][comp]); - } - } - - SetAlpha(); - SetDestAlpha(); - SetZTextureBias(); - SetViewportChanged(); - SetIndTexScaleChanged(false); - SetIndTexScaleChanged(true); + SetEfbScaleChanged(); SetIndMatrixChanged(0); SetIndMatrixChanged(1); SetIndMatrixChanged(2); @@ -62,8 +39,20 @@ void PixelShaderManager::Dirty() SetTexCoordChanged(5); SetTexCoordChanged(6); SetTexCoordChanged(7); - SetFogColorChanged(); + + dirty = true; +} + +void PixelShaderManager::Dirty() +{ + // This function is called after a savestate is loaded. + // Any constants that can changed based on settings should be re-calculated + s_bFogRangeAdjustChanged = true; + + SetEfbScaleChanged(); SetFogParamChanged(); + + dirty = true; } void PixelShaderManager::Shutdown() @@ -117,7 +106,7 @@ void PixelShaderManager::SetConstants() void PixelShaderManager::SetTevColor(int index, int component, s32 value) { auto& c = constants.colors[index]; - c[component] = s_tev_color[index][component] = value; + c[component] = value; dirty = true; PRIM_LOG("tev color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]); @@ -126,7 +115,7 @@ void PixelShaderManager::SetTevColor(int index, int component, s32 value) void PixelShaderManager::SetTevKonstColor(int index, int component, s32 value) { auto& c = constants.kcolors[index]; - c[component] = s_tev_konst_color[index][component] = value; + c[component] = value; dirty = true; PRIM_LOG("tev konst color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]); @@ -168,6 +157,21 @@ void PixelShaderManager::SetViewportChanged() s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation } +void PixelShaderManager::SetEfbScaleChanged() +{ + constants.efbscale[0] = 1.0f / float(Renderer::EFBToScaledXf(1)); + constants.efbscale[1] = 1.0f / float(Renderer::EFBToScaledYf(1)); + dirty = true; +} + +void PixelShaderManager::SetZSlope(float dfdx, float dfdy, float f0) +{ + constants.zslope[0] = dfdx; + constants.zslope[1] = dfdy; + constants.zslope[2] = f0; + dirty = true; +} + void PixelShaderManager::SetIndTexScaleChanged(bool high) { constants.indtexscale[high][0] = bpmem.texscale[high].ss0; @@ -278,12 +282,14 @@ void PixelShaderManager::SetFogRangeAdjustChanged() void PixelShaderManager::DoState(PointerWrap &p) { - p.DoArray(s_tev_color); - p.DoArray(s_tev_konst_color); + p.Do(s_bFogRangeAdjustChanged); + p.Do(s_bViewPortChanged); + + p.Do(constants); if (p.GetMode() == PointerWrap::MODE_READ) { - // Reload current state from global GPU state + // Fixup the current state from global GPU state // NOTE: This requires that all GPU memory has been loaded already. Dirty(); } diff --git a/Source/Core/VideoCommon/PixelShaderManager.h b/Source/Core/VideoCommon/PixelShaderManager.h index ebf299d9fc..0f3dba3699 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.h +++ b/Source/Core/VideoCommon/PixelShaderManager.h @@ -36,6 +36,8 @@ public: static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); static void SetZTextureBias(); static void SetViewportChanged(); + static void SetEfbScaleChanged(); + static void SetZSlope(float dfdx, float dfdy, float f0); static void SetIndMatrixChanged(int matrixidx); static void SetTevKSelChanged(int id); static void SetZTextureTypeChanged(); @@ -50,9 +52,4 @@ public: static bool s_bFogRangeAdjustChanged; static bool s_bViewPortChanged; - - // These colors aren't available from global BP state, - // hence we keep a copy of them around. - static std::array s_tev_color; - static std::array s_tev_konst_color; }; diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 571f8db5c5..4698392f9e 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -291,6 +291,8 @@ static inline void AssignVSOutputMembers(T& object, const char* a, const char* b #define I_FOGCOLOR "cfogcolor" #define I_FOGI "cfogi" #define I_FOGF "cfogf" +#define I_ZSLOPE "czslope" +#define I_EFBSCALE "cefbscale" #define I_POSNORMALMATRIX "cpnmtx" #define I_PROJECTION "cproj" diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 786cb45a96..e0115a0d29 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -149,19 +149,20 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo if ((int)src.size() < size) return -1; - if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5)) - { - // if cull mode is CULL_ALL, ignore triangles and quads + if (skip_drawing) return size; - } // If the native vertex format changed, force a flush. if (loader->m_native_vertex_format != s_current_vtx_fmt) VertexManager::Flush(); s_current_vtx_fmt = loader->m_native_vertex_format; + // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. + // They still need to go through vertex loading, because we need to calculate a zfreeze refrence slope. + bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5); + DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, - loader->m_native_vtx_decl.stride); + loader->m_native_vtx_decl.stride, cullall); count = loader->RunVertices(primitive, count, src, dst); diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 38cfd19630..0c5ccdd10d 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -12,6 +12,7 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" @@ -25,7 +26,10 @@ u8 *VertexManager::s_pEndBufferPointer; PrimitiveType VertexManager::current_primitive_type; +Slope VertexManager::ZSlope; + bool VertexManager::IsFlushed; +bool VertexManager::CullAll; static const PrimitiveType primitive_from_gx[8] = { PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS @@ -41,6 +45,7 @@ static const PrimitiveType primitive_from_gx[8] = { VertexManager::VertexManager() { IsFlushed = true; + CullAll = false; } VertexManager::~VertexManager() @@ -52,7 +57,7 @@ u32 VertexManager::GetRemainingSize() return (u32)(s_pEndBufferPointer - s_pCurBufferPointer); } -DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) +DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall) { // The SSE vertex loader can write up to 4 bytes past the end u32 const needed_vertex_bytes = count * stride + 4; @@ -78,6 +83,8 @@ DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 "Increase MAXVBUFFERSIZE or we need primitive breaking after all."); } + CullAll = cullall; + // need to alloc new buffer if (IsFlushed) { @@ -189,45 +196,66 @@ void VertexManager::Flush() (int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff); #endif - BitSet32 usedtextures; - for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevorders[i / 2].getEnable(i & 1)) - usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true; - - if (bpmem.genMode.numindstages > 0) - for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; - - for (unsigned int i : usedtextures) + // If the primitave is marked CullAll. All we need to do is update the vertex constants and calculate the zfreeze refrence slope + if (!CullAll) { - g_renderer->SetSamplerState(i & 3, i >> 2); - const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i); + BitSet32 usedtextures; + for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevorders[i / 2].getEnable(i & 1)) + usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true; - if (tentry) + if (bpmem.genMode.numindstages > 0) + for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; + + for (unsigned int i : usedtextures) { - // 0s are probably for no manual wrapping needed. - PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0); + g_renderer->SetSamplerState(i & 3, i >> 2); + const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i); + + if (tentry) + { + // 0s are probably for no manual wrapping needed. + PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0); + } + else + ERROR_LOG(VIDEO, "error loading texture"); } - else - ERROR_LOG(VIDEO, "error loading texture"); } - // set global constants + // set global vertex constants VertexShaderManager::SetConstants(); - GeometryShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); - bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && - bpmem.dstalpha.enable && - bpmem.blendmode.alphaupdate && - bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; + // Calculate ZSlope for zfreeze + if (!bpmem.genMode.zfreeze) + { + // Must be done after VertexShaderManager::SetConstants() + CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat()); + } + else if (ZSlope.dirty && !CullAll) // or apply any dirty ZSlopes + { + PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0); + ZSlope.dirty = false; + } - if (PerfQueryBase::ShouldEmulate()) - g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - g_vertex_manager->vFlush(useDstAlpha); - if (PerfQueryBase::ShouldEmulate()) - g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + if (!CullAll) + { + // set the rest of the global constants + GeometryShaderManager::SetConstants(); + PixelShaderManager::SetConstants(); + + bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && + bpmem.dstalpha.enable && + bpmem.blendmode.alphaupdate && + bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; + + if (PerfQueryBase::ShouldEmulate()) + g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + g_vertex_manager->vFlush(useDstAlpha); + if (PerfQueryBase::ShouldEmulate()) + g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + } GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); @@ -235,9 +263,69 @@ void VertexManager::Flush() ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value()); IsFlushed = true; + CullAll = false; } void VertexManager::DoState(PointerWrap& p) { + p.Do(ZSlope); g_vertex_manager->vDoState(p); } + +void VertexManager::CalculateZSlope(NativeVertexFormat *format) +{ + float vtx[9]; + float out[12]; + float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2, + xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2}; + + // Global matrix ID. + u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx; + const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration(); + size_t posOff = vert_decl.position.offset; + size_t mtxOff = vert_decl.posmtx.offset; + + // Lookup vertices of the last rendered triangle and software-transform them + // This allows us to determine the depth slope, which will be used if z--freeze + // is enabled in the following flush. + for (unsigned int i = 0; i < 3; ++i) + { + u8* vtx_ptr = s_pCurBufferPointer - vert_decl.stride * (3 - i); + vtx[0 + i * 3] = ((float*)(vtx_ptr + posOff))[0]; + vtx[1 + i * 3] = ((float*)(vtx_ptr + posOff))[1]; + vtx[2 + i * 3] = ((float*)(vtx_ptr + posOff))[2]; + + // If this vertex format has per-vertex position matrix IDs, look it up. + if(vert_decl.posmtx.enable) + mtxIdx = *((u32*)(vtx_ptr + mtxOff)); + + VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4], mtxIdx); + + // Transform to Screenspace + float inv_w = 1.0f / out[3 + i * 4]; + + out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0]; + out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1]; + out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ; + } + + float dx31 = out[8] - out[0]; + float dx12 = out[0] - out[4]; + float dy12 = out[1] - out[5]; + float dy31 = out[9] - out[1]; + + float DF31 = out[10] - out[2]; + float DF21 = out[6] - out[2]; + float a = DF31 * -dy12 - DF21 * dy31; + float b = dx31 * DF21 + dx12 * DF31; + float c = -dx12 * dy31 - dx31 * -dy12; + + // Sometimes we process de-generate triangles. Stop any divide by zeros + if (c == 0) + return; + + ZSlope.dfdx = -a / c; + ZSlope.dfdy = -b / c; + ZSlope.f0 = out[2] - (out[0] * ZSlope.dfdx + out[1] * ZSlope.dfdy); + ZSlope.dirty = true; +} diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index c854cd3586..4369438bc5 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -4,6 +4,7 @@ #include "Common/CommonFuncs.h" #include "Common/CommonTypes.h" #include "VideoCommon/DataReader.h" +#include "VideoCommon/NativeVertexFormat.h" class NativeVertexFormat; class PointerWrap; @@ -14,6 +15,14 @@ enum PrimitiveType { PRIMITIVE_TRIANGLES, }; +struct Slope +{ + float dfdx; + float dfdy; + float f0; + bool dirty; +}; + class VertexManager { private: @@ -32,7 +41,7 @@ public: // needs to be virtual for DX11's dtor virtual ~VertexManager(); - static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride); + static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); static void FlushData(u32 count, u32 stride); static void Flush(); @@ -55,6 +64,11 @@ protected: static u32 GetRemainingSize(); static u32 GetRemainingIndices(int primitive); + static Slope ZSlope; + static void CalculateZSlope(NativeVertexFormat *format); + + static bool CullAll; + private: static bool IsFlushed; diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index 4ca20a21f4..92c1fffa34 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -167,7 +167,21 @@ static void ViewportCorrectionMatrix(Matrix44& result) void VertexShaderManager::Init() { - Dirty(); + // Initialize state tracking variables + nTransformMatricesChanged[0] = -1; + nTransformMatricesChanged[1] = -1; + nNormalMatricesChanged[0] = -1; + nNormalMatricesChanged[1] = -1; + nPostTransformMatricesChanged[0] = -1; + nPostTransformMatricesChanged[1] = -1; + nLightsChanged[0] = -1; + nLightsChanged[1] = -1; + nMaterialsChanged = BitSet32(0); + bTexMatricesChanged[0] = false; + bTexMatricesChanged[1] = false; + bPosNormalMatrixChanged = false; + bProjectionChanged = true; + bViewportChanged = false; memset(&xfmem, 0, sizeof(xfmem)); memset(&constants, 0 , sizeof(constants)); @@ -178,6 +192,8 @@ void VertexShaderManager::Init() memset(g_fProjectionMatrix, 0, sizeof(g_fProjectionMatrix)); for (int i = 0; i < 4; ++i) g_fProjectionMatrix[i*5] = 1.0f; + + dirty = true; } void VertexShaderManager::Shutdown() @@ -186,26 +202,10 @@ void VertexShaderManager::Shutdown() void VertexShaderManager::Dirty() { - nTransformMatricesChanged[0] = 0; - nTransformMatricesChanged[1] = 256; - - nNormalMatricesChanged[0] = 0; - nNormalMatricesChanged[1] = 96; - - nPostTransformMatricesChanged[0] = 0; - nPostTransformMatricesChanged[1] = 256; - - nLightsChanged[0] = 0; - nLightsChanged[1] = 0x80; - - bPosNormalMatrixChanged = true; - bTexMatricesChanged[0] = true; - bTexMatricesChanged[1] = true; - + // This function is called after a savestate is loaded. + // Any constants that can changed based on settings should be re-calculated bProjectionChanged = true; - nMaterialsChanged = BitSet32::AllTrue(4); - dirty = true; } @@ -690,6 +690,25 @@ void VertexShaderManager::ResetView() bProjectionChanged = true; } +void VertexShaderManager::TransformToClipSpace(const float* data, float *out, u32 MtxIdx) +{ + const float *world_matrix = (const float *)xfmem.posMatrices + (MtxIdx & 0x3f) * 4; + // We use the projection matrix calculated by vertexShaderManager, because it + // includes any free look transformations. + // Make sure VertexManager::SetConstants() has been called first. + const float *proj_matrix = &g_fProjectionMatrix[0]; + + float t[3]; + t[0] = data[0] * world_matrix[0] + data[1] * world_matrix[1] + data[2] * world_matrix[2] + world_matrix[3]; + t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7]; + t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11]; + + out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3]; + out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7]; + out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11]; + out[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15]; +} + void VertexShaderManager::DoState(PointerWrap &p) { p.Do(g_fProjectionMatrix); @@ -698,8 +717,19 @@ void VertexShaderManager::DoState(PointerWrap &p) p.Do(s_viewInvRotationMatrix); p.Do(s_fViewTranslationVector); p.Do(s_fViewRotation); + + p.Do(nTransformMatricesChanged); + p.Do(nNormalMatricesChanged); + p.Do(nPostTransformMatricesChanged); + p.Do(nLightsChanged); + + p.Do(nMaterialsChanged); + p.Do(bTexMatricesChanged); + p.Do(bPosNormalMatrixChanged); + p.Do(bProjectionChanged); + p.Do(bViewportChanged); + p.Do(constants); - p.Do(dirty); if (p.GetMode() == PointerWrap::MODE_READ) { diff --git a/Source/Core/VideoCommon/VertexShaderManager.h b/Source/Core/VideoCommon/VertexShaderManager.h index d99f07fe21..9689cd8238 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.h +++ b/Source/Core/VideoCommon/VertexShaderManager.h @@ -34,6 +34,12 @@ public: static void RotateView(float x, float y); static void ResetView(); + // data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index. + // out: 4 floats which will be initialized with the corresponding clip space coordinates + // NOTE: g_fProjectionMatrix must be up to date when this is called + // (i.e. VertexShaderManager::SetConstants needs to be called before using this!) + static void TransformToClipSpace(const float* data, float *out, u32 mtxIdx); + static VertexShaderConstants constants; static bool dirty; };