From 63264ac23f3b9cfaa725cf90d995497ba1601449 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 28 Nov 2015 21:33:47 +1000 Subject: [PATCH] D3D: Fix EFB depth buffer copies, filtering on scaled EFB copies when MSAA is enabled, real XFB filtering Since ResolveSubresource cannot be used with depth textures (and throws an error with the debug layer enabled), use a shader which selects the minimum depth value from all samples. Changes the sampler by XFBEncoder to use a linear filter, rather than point, to match GL behavior. --- .../VideoBackends/D3D/FramebufferManager.cpp | 46 ++++++++++++++++--- .../VideoBackends/D3D/FramebufferManager.h | 1 + .../VideoBackends/D3D/PSTextureEncoder.cpp | 15 +++--- .../VideoBackends/D3D/PixelShaderCache.cpp | 32 +++++++++++++ .../Core/VideoBackends/D3D/PixelShaderCache.h | 1 + .../Core/VideoBackends/D3D/TextureCache.cpp | 19 +++++++- Source/Core/VideoBackends/D3D/XFBEncoder.cpp | 3 +- 7 files changed, 99 insertions(+), 18 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D/FramebufferManager.cpp index 8ba8074d40..ae7ca4b973 100644 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D/FramebufferManager.cpp @@ -4,6 +4,7 @@ #include "Core/HW/Memmap.h" #include "VideoBackends/D3D/D3DBase.h" +#include "VideoBackends/D3D/D3DState.h" #include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/FramebufferManager.h" #include "VideoBackends/D3D/GeometryShaderCache.h" @@ -20,6 +21,7 @@ static XFBEncoder s_xfbEncoder; FramebufferManager::Efb FramebufferManager::m_efb; unsigned int FramebufferManager::m_target_width; unsigned int FramebufferManager::m_target_height; +ID3D11DepthStencilState* FramebufferManager::m_depth_resolve_depth_state; D3DTexture2D* &FramebufferManager::GetEFBColorTexture() { return m_efb.color_tex; } ID3D11Texture2D* &FramebufferManager::GetEFBColorStagingBuffer() { return m_efb.color_staging_buf; } @@ -44,8 +46,29 @@ D3DTexture2D* &FramebufferManager::GetResolvedEFBDepthTexture() { if (g_ActiveConfig.iMultisampleMode) { - for (int i = 0; i < m_efb.slices; i++) - D3D::context->ResolveSubresource(m_efb.resolved_depth_tex->GetTex(), D3D11CalcSubresource(0, i, 1), m_efb.depth_tex->GetTex(), D3D11CalcSubresource(0, i, 1), DXGI_FORMAT_R24_UNORM_X8_TYPELESS); + // ResolveSubresource does not work with depth textures. + // Instead, we use a shader that selects the minimum depth from all samples. + + // Clear render state, and enable depth writes. + g_renderer->ResetAPIState(); + D3D::stateman->PushDepthState(m_depth_resolve_depth_state); + + // Set up to render to resolved depth texture. + const D3D11_VIEWPORT viewport = CD3D11_VIEWPORT(0.f, 0.f, (float)m_target_width, (float)m_target_height); + D3D::context->RSSetViewports(1, &viewport); + D3D::context->OMSetRenderTargets(0, nullptr, m_efb.resolved_depth_tex->GetDSV()); + + // Render a quad covering the entire target, writing SV_Depth. + const D3D11_RECT source_rect = CD3D11_RECT(0, 0, m_target_width, m_target_height); + D3D::drawShadedTexQuad(m_efb.depth_tex->GetSRV(), &source_rect, m_target_width, m_target_height, + PixelShaderCache::GetDepthResolveProgram(), VertexShaderCache::GetSimpleVertexShader(), + VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); + + // Restore render state. + D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); + D3D::stateman->PopDepthState(); + g_renderer->RestoreAPIState(); + return m_efb.resolved_depth_tex; } else @@ -77,7 +100,6 @@ FramebufferManager::FramebufferManager() hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); CHECK(hr==S_OK, "create EFB color texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); m_efb.color_tex = new D3DTexture2D(buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE|D3D11_BIND_RENDER_TARGET), DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1)); - CHECK(m_efb.color_tex!=nullptr, "create EFB color texture (size: %dx%d)", m_target_width, m_target_height); SAFE_RELEASE(buf); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.color_tex->GetTex(), "EFB color texture"); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.color_tex->GetSRV(), "EFB color texture shader resource view"); @@ -88,7 +110,6 @@ FramebufferManager::FramebufferManager() hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); CHECK(hr==S_OK, "create EFB color temp texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); m_efb.color_temp_tex = new D3DTexture2D(buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE|D3D11_BIND_RENDER_TARGET), DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1)); - CHECK(m_efb.color_temp_tex!=nullptr, "create EFB color temp texture (size: %dx%d)", m_target_width, m_target_height); SAFE_RELEASE(buf); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.color_temp_tex->GetTex(), "EFB color temp texture"); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.color_temp_tex->GetSRV(), "EFB color temp texture shader resource view"); @@ -130,24 +151,34 @@ FramebufferManager::FramebufferManager() // Framebuffer resolve textures (color+depth) texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE, D3D11_USAGE_DEFAULT, 0, 1); hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); + CHECK(hr==S_OK, "create EFB color resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); m_efb.resolved_color_tex = new D3DTexture2D(buf, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM); - CHECK(m_efb.resolved_color_tex!=nullptr, "create EFB color resolve texture (size: %dx%d)", m_target_width, m_target_height); SAFE_RELEASE(buf); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.resolved_color_tex->GetTex(), "EFB color resolve texture"); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.resolved_color_tex->GetSRV(), "EFB color resolve texture shader resource view"); - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R24G8_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE); + texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R24G8_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_DEPTH_STENCIL); hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); CHECK(hr==S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); - m_efb.resolved_depth_tex = new D3DTexture2D(buf, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_R24_UNORM_X8_TYPELESS); + m_efb.resolved_depth_tex = new D3DTexture2D(buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_DEPTH_STENCIL), DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT); SAFE_RELEASE(buf); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.resolved_depth_tex->GetTex(), "EFB depth resolve texture"); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.resolved_depth_tex->GetSRV(), "EFB depth resolve texture shader resource view"); + + // Depth state used when writing resolved depth texture + D3D11_DEPTH_STENCIL_DESC depth_resolve_depth_state = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); + depth_resolve_depth_state.DepthEnable = TRUE; + depth_resolve_depth_state.DepthFunc = D3D11_COMPARISON_ALWAYS; + depth_resolve_depth_state.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; + hr = D3D::device->CreateDepthStencilState(&depth_resolve_depth_state, &m_depth_resolve_depth_state); + CHECK(hr == S_OK, "create depth resolve depth stencil state"); + D3D::SetDebugObjectName((ID3D11DeviceChild*)m_depth_resolve_depth_state, "depth resolve depth stencil state"); } else { m_efb.resolved_color_tex = nullptr; m_efb.resolved_depth_tex = nullptr; + m_depth_resolve_depth_state = nullptr; } s_xfbEncoder.Init(); @@ -165,6 +196,7 @@ FramebufferManager::~FramebufferManager() SAFE_RELEASE(m_efb.depth_staging_buf); SAFE_RELEASE(m_efb.depth_read_texture); SAFE_RELEASE(m_efb.resolved_depth_tex); + SAFE_RELEASE(m_depth_resolve_depth_state); } void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,float Gamma) diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.h b/Source/Core/VideoBackends/D3D/FramebufferManager.h index 9151c7ef56..264cc37028 100644 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.h +++ b/Source/Core/VideoBackends/D3D/FramebufferManager.h @@ -104,6 +104,7 @@ private: static unsigned int m_target_width; static unsigned int m_target_height; + static ID3D11DepthStencilState* m_depth_resolve_depth_state; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 68a7a84d6a..5698077817 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -96,6 +96,14 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p HRESULT hr; + // Resolve MSAA targets before copying. + ID3D11ShaderResourceView* pEFB = (srcFormat == PEControl::Z24) ? + FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : + // FIXME: Instead of resolving EFB, it would be better to pick out a + // single sample from each pixel. The game may break if it isn't + // expecting the blurred edges around multisampled shapes. + FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); + // Reset API g_renderer->ResetAPIState(); @@ -111,13 +119,6 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr); - ID3D11ShaderResourceView* pEFB = (srcFormat == PEControl::Z24) ? - FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : - // FIXME: Instead of resolving EFB, it would be better to pick out a - // single sample from each pixel. The game may break if it isn't - // expecting the blurred edges around multisampled shapes. - FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - EFBEncodeParams params; params.SrcLeft = srcRect.left; params.SrcTop = srcRect.top; diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index d1013c5539..07edb8fe8b 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -36,6 +36,7 @@ ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr}; ID3D11PixelShader* s_DepthMatrixProgram[2] = {nullptr}; ID3D11PixelShader* s_ClearProgram = nullptr; ID3D11PixelShader* s_AnaglyphProgram = nullptr; +ID3D11PixelShader* s_DepthResolveProgram = nullptr; ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr}; ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr}; ID3D11Buffer* pscbuf = nullptr; @@ -199,6 +200,22 @@ const char depth_matrix_program_msaa[] = { "}\n" }; +const char depth_resolve_program[] = { + "#define SAMPLES %d\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "void main(\n" + " out float depth : SV_Depth,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0)\n" + "{\n" + " int width, height, slices, samples;\n" + " Tex0.GetDimensions(width, height, slices, samples);\n" + " depth = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" + " for(int i = 1; i < SAMPLES; ++i)\n" + " depth = min(depth, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" + "}\n" +}; + const char reint_rgba6_to_rgb8[] = { "sampler samp0 : register(s0);\n" "Texture2DArray Tex0 : register(t0);\n" @@ -406,6 +423,19 @@ ID3D11PixelShader* PixelShaderCache::GetAnaglyphProgram() return s_AnaglyphProgram; } +ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram() +{ + if (s_DepthResolveProgram != nullptr) + return s_DepthResolveProgram; + + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(depth_resolve_program, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count); + s_DepthResolveProgram = D3D::CompileAndCreatePixelShader(buf); + CHECK(s_DepthResolveProgram != nullptr, "Create depth matrix MSAA pixel shader"); + D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthResolveProgram, "depth resolve pixel shader"); + return s_DepthResolveProgram; +} + ID3D11Buffer* &PixelShaderCache::GetConstantBuffer() { // TODO: divide the global variables of the generated shaders into about 5 constant buffers to speed this up @@ -503,6 +533,7 @@ void PixelShaderCache::InvalidateMSAAShaders() SAFE_RELEASE(s_DepthMatrixProgram[1]); SAFE_RELEASE(s_rgb8_to_rgba6[1]); SAFE_RELEASE(s_rgba6_to_rgb8[1]); + SAFE_RELEASE(s_DepthResolveProgram); } void PixelShaderCache::Shutdown() @@ -511,6 +542,7 @@ void PixelShaderCache::Shutdown() SAFE_RELEASE(s_ClearProgram); SAFE_RELEASE(s_AnaglyphProgram); + SAFE_RELEASE(s_DepthResolveProgram); for (int i = 0; i < 2; ++i) { SAFE_RELEASE(s_ColorCopyProgram[i]); diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h index f63956a92f..0e8c071caa 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.h @@ -31,6 +31,7 @@ public: static ID3D11PixelShader* GetDepthMatrixProgram(bool multisampled); static ID3D11PixelShader* GetClearProgram(); static ID3D11PixelShader* GetAnaglyphProgram(); + static ID3D11PixelShader* GetDepthResolveProgram(); static ID3D11PixelShader* ReinterpRGBA6ToRGB8(bool multisampled); static ID3D11PixelShader* ReinterpRGB8ToRGBA6(bool multisampled); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index 0de91068c7..49f60feeb5 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -188,6 +188,21 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool scaleByHalf, unsigned int cbufid, const float *colmat) { + // When copying at half size, in multisampled mode, resolve the color/depth buffer first. + // This is because multisampled texture reads go through Load, not Sample, and the linear + // filter is ignored. + bool multisampled = (g_ActiveConfig.iMultisampleMode != 0); + ID3D11ShaderResourceView* efbTexSRV = (srcFormat == PEControl::Z24) ? + FramebufferManager::GetEFBDepthTexture()->GetSRV() : + FramebufferManager::GetEFBColorTexture()->GetSRV(); + if (multisampled && scaleByHalf) + { + multisampled = false; + efbTexSRV = (srcFormat == PEControl::Z24) ? + FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : + FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); + } + g_renderer->ResetAPIState(); // stretch picture with increased internal resolution @@ -224,10 +239,10 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat // Create texture copy D3D::drawShadedTexQuad( - (srcFormat == PEControl::Z24 ? FramebufferManager::GetEFBDepthTexture() : FramebufferManager::GetEFBColorTexture())->GetSRV(), + efbTexSRV, &sourcerect, Renderer::GetTargetWidth(), Renderer::GetTargetHeight(), - srcFormat == PEControl::Z24 ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true), + srcFormat == PEControl::Z24 ? PixelShaderCache::GetDepthMatrixProgram(multisampled) : PixelShaderCache::GetColorMatrixProgram(multisampled), VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); diff --git a/Source/Core/VideoBackends/D3D/XFBEncoder.cpp b/Source/Core/VideoBackends/D3D/XFBEncoder.cpp index 560b13fa7a..a4fb947d5e 100644 --- a/Source/Core/VideoBackends/D3D/XFBEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/XFBEncoder.cpp @@ -247,8 +247,7 @@ void XFBEncoder::Init() // Create EFB texture sampler D3D11_SAMPLER_DESC sd = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - // FIXME: Should we really use point sampling here? - sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + sd.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; hr = D3D::device->CreateSamplerState(&sd, &m_efbSampler); CHECK(SUCCEEDED(hr), "create xfb encode texture sampler"); D3D::SetDebugObjectName(m_efbSampler, "xfb encoder texture sampler");