From c4f85a38e630c80feb1deb9205d8bb3ff5b7caec Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Tue, 5 May 2015 23:34:45 +0200 Subject: [PATCH 01/11] VideoBackends: Use proper floating point depth precision. --- Source/Core/VideoBackends/D3D/PixelShaderCache.cpp | 4 ++-- Source/Core/VideoBackends/D3D/Render.cpp | 4 ++-- Source/Core/VideoBackends/OGL/TextureCache.cpp | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 4 ++-- Source/Core/VideoCommon/TextureConversionShader.cpp | 10 +++++----- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 3e2c6292c0..4a2def325c 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -146,7 +146,7 @@ const char depth_matrix_program[] = { " in float4 pos : SV_Position,\n" " in float3 uv0 : TEXCOORD0){\n" " float4 texcol = Tex0.Sample(samp0,uv0);\n" - " int depth = int(round(texcol.x * float(0xFFFFFF)));\n" + " int depth = int(round(texcol.x * 16777216.0));\n" // Convert to Z24 format " int4 workspace;\n" @@ -180,7 +180,7 @@ const char depth_matrix_program_msaa[] = { " for(int i = 0; i < SAMPLES; ++i)\n" " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" " texcol /= SAMPLES;\n" - " int depth = int(round(texcol.x * float(0xFFFFFF)));\n" + " int depth = int(round(texcol.x * 16777216.0));\n" // Convert to Z24 format " int4 workspace;\n" diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index ec73313476..67046ecf91 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -419,11 +419,11 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) { // if Z is in 16 bit format you must return a 16 bit integer - ret = ((u32)(val * 0xffff)); + ret = ((u32)(val * 65536.0f)); } else { - ret = ((u32)(val * 0xffffff)); + ret = ((u32)(val * 16777216.0f)); } D3D::context->Unmap(read_tex, 0); diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index abc1a73ba9..31979b8347 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -294,7 +294,7 @@ void TextureCache::CompileShaders() "\n" "void main(){\n" " vec4 texcol = texture(samp9, vec3(f_uv0.xy, %s));\n" - " int depth = int(round(texcol.x * float(0xFFFFFF)));\n" + " int depth = int(round(texcol.x * 16777216.0));\n" // Convert to Z24 format " ivec4 workspace;\n" diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 4873afc567..5581aa1f1d 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -1224,11 +1224,11 @@ static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, A if (ApiType == API_OPENGL) out.Write("\tscreenpos.y = %i - screenpos.y;\n", EFB_HEIGHT); - out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xFFFFFF);\n"); + out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / 16777216.0;\n"); } else { - out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); + out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); } } diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 99559db08b..18e5c258c0 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -487,7 +487,7 @@ static void WriteZ16Encoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 0, ApiType); - WRITE(p, " depth *= 16777215.0;\n"); + WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -497,7 +497,7 @@ static void WriteZ16Encoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 1, ApiType); - WRITE(p, " depth *= 16777215.0;\n"); + WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -519,7 +519,7 @@ static void WriteZ16LEncoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 0, ApiType); - WRITE(p, " depth *= 16777215.0;\n"); + WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -531,7 +531,7 @@ static void WriteZ16LEncoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 1, ApiType); - WRITE(p, " depth *= 16777215.0;\n"); + WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -558,7 +558,7 @@ static void WriteZ24Encoder(char*& p, API_TYPE ApiType) for (int i = 0; i < 2; i++) { - WRITE(p, " depth%i *= 16777215.0;\n", i); + WRITE(p, " depth%i *= 16777216.0;\n", i); WRITE(p, " expanded%i.r = floor(depth%i / (256.0 * 256.0));\n", i, i); WRITE(p, " depth%i -= expanded%i.r * 256.0 * 256.0;\n", i, i); From 4b2e04b862ef0e5166f1c855148a44bc7c88f381 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Wed, 6 May 2015 21:31:05 +0200 Subject: [PATCH 02/11] OGL: Change the depth buffer type to GL_FLOAT. --- .../VideoBackends/OGL/FramebufferManager.cpp | 4 +-- Source/Core/VideoBackends/OGL/Render.cpp | 31 ++++++++++--------- Source/Core/VideoBackends/OGL/Render.h | 2 +- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 23c0600819..13e2dc738b 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -89,7 +89,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(m_textureType, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(m_textureType, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage3D(m_textureType, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr); + glTexImage3D(m_textureType, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); glBindTexture(m_textureType, m_efbColorSwap); glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); @@ -150,7 +150,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glTexParameteri(resolvedType, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(resolvedType, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(resolvedType, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage3D(resolvedType, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr); + glTexImage3D(resolvedType, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index a1f63c6d32..638192df44 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -914,7 +914,7 @@ void ClearEFBCache() } } -void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const u32* data) +void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const void* data) { u32 cacheType = (type == PEEK_Z ? 0 : 1); @@ -936,7 +936,18 @@ void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRec u32 xEFB = efbPixelRc.left + xCache; u32 xPixel = (EFBToScaledX(xEFB) + EFBToScaledX(xEFB + 1)) / 2; u32 xData = xPixel - targetPixelRc.left; - s_efbCache[cacheType][cacheRectIdx][yCache * EFB_CACHE_RECT_SIZE + xCache] = data[yData * targetPixelRcWidth + xData]; + u32 value; + if (type == PEEK_Z) + { + float* ptr = (float*)data; + value = (u32)(ptr[yData * targetPixelRcWidth + xData] * 16777216.0f); + } + else + { + u32* ptr = (u32*)data; + value = ptr[yData * targetPixelRcWidth + xData]; + } + s_efbCache[cacheType][cacheRectIdx][yCache * EFB_CACHE_RECT_SIZE + xCache] = value; } } @@ -1005,10 +1016,10 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) g_renderer->RestoreAPIState(); } - u32* depthMap = new u32[targetPixelRcWidth * targetPixelRcHeight]; + float* depthMap = new float[targetPixelRcWidth * targetPixelRcHeight]; glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, targetPixelRcHeight, - GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, depthMap); + GL_DEPTH_COMPONENT, GL_FLOAT, depthMap); UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, depthMap); @@ -1019,18 +1030,10 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) u32 yRect = y % EFB_CACHE_RECT_SIZE; z = s_efbCache[0][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect]; - // Scale the 32-bit value returned by glReadPixels to a 24-bit - // value (GC uses a 24-bit Z-buffer). - // TODO: in RE0 this value is often off by one, which causes lighting to disappear + // if Z is in 16 bit format you must return a 16 bit integer if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - z = z >> 16; - } - else - { z = z >> 8; - } + return z; } diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 834f73e3a3..9d4951b54c 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -91,7 +91,7 @@ public: int GetMaxTextureSize() override; private: - void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const u32* data); + void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const void* data); void BlitScreen(TargetRectangle src, TargetRectangle dst, GLuint src_texture, int src_width, int src_height); }; From be810eb75038bce43054d9a4443d59c5ddf0be83 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Wed, 6 May 2015 21:42:23 +0200 Subject: [PATCH 03/11] OGL: Switch depth buffers to GL_DEPTH_COMPONENT32F format. --- Source/Core/VideoBackends/OGL/FramebufferManager.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 13e2dc738b..3eea0dcd36 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -89,7 +89,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(m_textureType, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(m_textureType, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage3D(m_textureType, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); + glTexImage3D(m_textureType, 0, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); glBindTexture(m_textureType, m_efbColorSwap); glTexParameteri(m_textureType, GL_TEXTURE_MAX_LEVEL, 0); @@ -111,7 +111,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, false); glBindTexture(m_textureType, m_efbDepth); - glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, false); + glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, m_EFBLayers, false); glBindTexture(m_textureType, m_efbColorSwap); glTexImage3DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, m_targetHeight, m_EFBLayers, false); @@ -125,7 +125,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, m_targetHeight, false); glBindTexture(m_textureType, m_efbDepth); - glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, false); + glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, false); glBindTexture(m_textureType, m_efbColorSwap); glTexImage2DMultisample(m_textureType, m_msaaSamples, GL_RGBA, m_targetWidth, m_targetHeight, false); @@ -150,7 +150,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glTexParameteri(resolvedType, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(resolvedType, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(resolvedType, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage3D(resolvedType, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); + glTexImage3D(resolvedType, 0, GL_DEPTH_COMPONENT32F, m_targetWidth, m_targetHeight, m_EFBLayers, 0, GL_DEPTH_COMPONENT, GL_FLOAT, nullptr); // Bind resolved textures to resolved framebuffer. glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer); From b0770e2a0c2e391139db1365ca3658ada0f87da0 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Wed, 6 May 2015 22:02:12 +0200 Subject: [PATCH 04/11] VideoBackends: Floor depth values in depth copy shaders. --- Source/Core/VideoBackends/D3D/PixelShaderCache.cpp | 4 ++-- Source/Core/VideoBackends/OGL/TextureCache.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index 4a2def325c..b6bbff313b 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -146,7 +146,7 @@ const char depth_matrix_program[] = { " in float4 pos : SV_Position,\n" " in float3 uv0 : TEXCOORD0){\n" " float4 texcol = Tex0.Sample(samp0,uv0);\n" - " int depth = int(round(texcol.x * 16777216.0));\n" + " int depth = int(floor(texcol.x * 16777216.0));\n" // Convert to Z24 format " int4 workspace;\n" @@ -180,7 +180,7 @@ const char depth_matrix_program_msaa[] = { " for(int i = 0; i < SAMPLES; ++i)\n" " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" " texcol /= SAMPLES;\n" - " int depth = int(round(texcol.x * 16777216.0));\n" + " int depth = int(floor(texcol.x * 16777216.0));\n" // Convert to Z24 format " int4 workspace;\n" diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 31979b8347..a904ca64af 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -294,7 +294,7 @@ void TextureCache::CompileShaders() "\n" "void main(){\n" " vec4 texcol = texture(samp9, vec3(f_uv0.xy, %s));\n" - " int depth = int(round(texcol.x * 16777216.0));\n" + " int depth = int(floor(texcol.x * 16777216.0));\n" // Convert to Z24 format " ivec4 workspace;\n" From 0f2c72f0f844c219e168faa7de8dd515f8723fdc Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Wed, 6 May 2015 22:42:06 +0200 Subject: [PATCH 05/11] VideoCommon: Clamp integer conversions. --- Source/Core/VideoBackends/D3D/PixelShaderCache.cpp | 4 ++-- Source/Core/VideoBackends/OGL/TextureCache.cpp | 2 +- Source/Core/VideoCommon/TextureConversionShader.cpp | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index b6bbff313b..eef2fdbe7a 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -146,7 +146,7 @@ const char depth_matrix_program[] = { " in float4 pos : SV_Position,\n" " in float3 uv0 : TEXCOORD0){\n" " float4 texcol = Tex0.Sample(samp0,uv0);\n" - " int depth = int(floor(texcol.x * 16777216.0));\n" + " int depth = clamp(int(texcol.x * 16777216.0), 0, 0xFFFFFF);\n" // Convert to Z24 format " int4 workspace;\n" @@ -180,7 +180,7 @@ const char depth_matrix_program_msaa[] = { " for(int i = 0; i < SAMPLES; ++i)\n" " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" " texcol /= SAMPLES;\n" - " int depth = int(floor(texcol.x * 16777216.0));\n" + " int depth = clamp(int(texcol.x * 16777216.0), 0, 0xFFFFFF);\n" // Convert to Z24 format " int4 workspace;\n" diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index a904ca64af..177d33432d 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -294,7 +294,7 @@ void TextureCache::CompileShaders() "\n" "void main(){\n" " vec4 texcol = texture(samp9, vec3(f_uv0.xy, %s));\n" - " int depth = int(floor(texcol.x * 16777216.0));\n" + " int depth = clamp(int(texcol.x * 16777216.0), 0, 0xFFFFFF);\n" // Convert to Z24 format " ivec4 workspace;\n" diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 18e5c258c0..844710c3ee 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -487,7 +487,7 @@ static void WriteZ16Encoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 0, ApiType); - WRITE(p, " depth *= 16777216.0;\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -497,7 +497,7 @@ static void WriteZ16Encoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 1, ApiType); - WRITE(p, " depth *= 16777216.0;\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -519,7 +519,7 @@ static void WriteZ16LEncoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 0, ApiType); - WRITE(p, " depth *= 16777216.0;\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -531,7 +531,7 @@ static void WriteZ16LEncoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 1, ApiType); - WRITE(p, " depth *= 16777216.0;\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -558,7 +558,7 @@ static void WriteZ24Encoder(char*& p, API_TYPE ApiType) for (int i = 0; i < 2; i++) { - WRITE(p, " depth%i *= 16777216.0;\n", i); + WRITE(p, " depth%i = clamp(depth%i * 16777216.0, 0, 0xFFFFFF);\n", i, i); WRITE(p, " expanded%i.r = floor(depth%i / (256.0 * 256.0));\n", i, i); WRITE(p, " depth%i -= expanded%i.r * 256.0 * 256.0;\n", i, i); From 1a409a2e16b46d88e3dbd6e0b01eb2145050383b Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Thu, 7 May 2015 00:37:58 +0200 Subject: [PATCH 06/11] VideoBackends: Clamp Z peek values. --- Source/Core/VideoBackends/D3D/Render.cpp | 5 +++-- Source/Core/VideoBackends/OGL/Render.cpp | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 67046ecf91..1ce0d1191e 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -8,6 +8,7 @@ #include #include +#include "Common/MathUtil.h" #include "Common/Timer.h" #include "Core/ConfigManager.h" @@ -419,11 +420,11 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) { // if Z is in 16 bit format you must return a 16 bit integer - ret = ((u32)(val * 65536.0f)); + ret = MathUtil::Clamp((u32)(val * 65536.0f), 0, 0xFFFF); } else { - ret = ((u32)(val * 16777216.0f)); + ret = MathUtil::Clamp((u32)(val * 16777216.0f), 0, 0xFFFFFF); } D3D::context->Unmap(read_tex, 0); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 638192df44..50a166e2da 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -12,6 +12,7 @@ #include "Common/Atomic.h" #include "Common/CommonPaths.h" #include "Common/FileUtil.h" +#include "Common/MathUtil.h" #include "Common/StringUtil.h" #include "Common/Thread.h" #include "Common/Timer.h" @@ -940,7 +941,7 @@ void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRec if (type == PEEK_Z) { float* ptr = (float*)data; - value = (u32)(ptr[yData * targetPixelRcWidth + xData] * 16777216.0f); + value = MathUtil::Clamp((u32)(ptr[yData * targetPixelRcWidth + xData] * 16777216.0f), 0, 0xFFFFFF); } else { From 84a5f4abb07d01fdc32ff6cf96e2a4d682b07e06 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Thu, 7 May 2015 23:29:16 +0200 Subject: [PATCH 07/11] VideoBackends: Use the new divisor when clearing the depth buffer. --- Source/Core/VideoBackends/D3D/Render.cpp | 2 +- Source/Core/VideoBackends/OGL/Render.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 1ce0d1191e..f11749d868 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -549,7 +549,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE // Color is passed in bgra mode so we need to convert it to rgba u32 rgbaColor = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000); - D3D::drawClearQuad(rgbaColor, (z & 0xFFFFFF) / float(0xFFFFFF)); + D3D::drawClearQuad(rgbaColor, (z & 0xFFFFFF) / 16777216.0f); D3D::stateman->PopDepthState(); D3D::stateman->PopBlendState(); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 50a166e2da..1c0e3df4c2 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -1139,7 +1139,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) ResetAPIState(); glDepthMask(GL_TRUE); - glClearDepthf(float(poke_data & 0xFFFFFF) / float(0xFFFFFF)); + glClearDepthf(float(poke_data & 0xFFFFFF) / 16777216.0f); glEnable(GL_SCISSOR_TEST); glScissor(targetPixelRc.left, targetPixelRc.bottom, targetPixelRc.GetWidth(), targetPixelRc.GetHeight()); @@ -1273,7 +1273,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE // depth glDepthMask(zEnable ? GL_TRUE : GL_FALSE); - glClearDepthf(float(z & 0xFFFFFF) / float(0xFFFFFF)); + glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); // Update rect for clearing the picture glEnable(GL_SCISSOR_TEST); From a224c604a37d712364cbb0a876ce12e0653855c2 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Thu, 7 May 2015 23:33:21 +0200 Subject: [PATCH 08/11] D3D: Use a 32-bit floating point depth buffer. --- Source/Core/VideoBackends/D3D/FramebufferManager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D/FramebufferManager.cpp index f6751b085f..d3791c4a11 100644 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D/FramebufferManager.cpp @@ -93,10 +93,10 @@ FramebufferManager::FramebufferManager() D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.color_staging_buf, "EFB color staging texture (used for Renderer::AccessEFB)"); // EFB depth buffer - primary depth buffer - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R24G8_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE, D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); + texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE, D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); CHECK(hr==S_OK, "create EFB depth texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); - m_efb.depth_tex = new D3DTexture2D(buf, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL|D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1)); + m_efb.depth_tex = new D3DTexture2D(buf, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL|D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1)); SAFE_RELEASE(buf); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.depth_tex->GetTex(), "EFB depth texture"); D3D::SetDebugObjectName((ID3D11DeviceChild*)m_efb.depth_tex->GetDSV(), "EFB depth texture depth stencil view"); From 5dbb43ae1d8a7cd1550ee55ad6e5afacbabad483 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Thu, 7 May 2015 23:49:09 +0200 Subject: [PATCH 09/11] PixelShaderGen: Use new multiplier everywhere and directly cast to int instead or rounding. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 5581aa1f1d..975b8e7f71 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -563,12 +563,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // The performance impact of this additional calculation doesn't matter, but it prevents // the host GPU driver from performing any early depth test optimizations. if (g_ActiveConfig.bFastDepthCalc) - out.Write("\tint zCoord = iround(rawpos.z * float(0xFFFFFF));\n"); + out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); else { out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); // the screen space depth value = far z + (clip z / clip w) * z range - out.Write("\tint zCoord = " I_ZBIAS"[1].x + iround((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y));\n"); + out.Write("\tint zCoord = " I_ZBIAS"[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y));\n"); } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either @@ -1169,13 +1169,13 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data) // TODO: Verify that we want to drop lower bits here! (currently taken over from software renderer) // Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead? // That's equivalent, but keeps the lower bits of Zs. - out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777215.0) / float(" I_FOGI".y - (zCoord >> " I_FOGI".w));\n"); + out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777216.0) / float(" I_FOGI".y - (zCoord >> " I_FOGI".w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - out.Write("\tfloat ze = " I_FOGF"[1].x * float(zCoord) / 16777215.0;\n"); + out.Write("\tfloat ze = " I_FOGF"[1].x * float(zCoord) / 16777216.0;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k From d04af15ad442d410ebc9d2ba348fea062eb89d92 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Thu, 7 May 2015 23:50:14 +0200 Subject: [PATCH 10/11] TextureConversionShader: Use floating point values in clamp(). --- Source/Core/VideoCommon/TextureConversionShader.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 844710c3ee..9d8c4eab5c 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -487,7 +487,7 @@ static void WriteZ16Encoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 0, ApiType); - WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0.0, float(0xFFFFFF));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -497,7 +497,7 @@ static void WriteZ16Encoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 1, ApiType); - WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0.0, float(0xFFFFFF));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -519,7 +519,7 @@ static void WriteZ16LEncoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 0, ApiType); - WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0.0, float(0xFFFFFF));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -531,7 +531,7 @@ static void WriteZ16LEncoder(char*& p,API_TYPE ApiType) WriteSampleColor(p, "r", "depth", 1, ApiType); - WRITE(p, " depth = clamp(depth * 16777216.0, 0, 0xFFFFFF);\n"); + WRITE(p, " depth = clamp(depth * 16777216.0, 0.0, float(0xFFFFFF));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n"); WRITE(p, " expanded.g = floor(depth / 256.0);\n"); @@ -558,7 +558,7 @@ static void WriteZ24Encoder(char*& p, API_TYPE ApiType) for (int i = 0; i < 2; i++) { - WRITE(p, " depth%i = clamp(depth%i * 16777216.0, 0, 0xFFFFFF);\n", i, i); + WRITE(p, " depth%i = clamp(depth%i * 16777216.0, 0.0, float(0xFFFFFF));\n", i, i); WRITE(p, " expanded%i.r = floor(depth%i / (256.0 * 256.0));\n", i, i); WRITE(p, " depth%i -= expanded%i.r * 256.0 * 256.0;\n", i, i); From 1d745d632ab9f4774bc38782cd688a5fa48bc097 Mon Sep 17 00:00:00 2001 From: Jules Blok Date: Fri, 8 May 2015 14:26:48 +0200 Subject: [PATCH 11/11] PixelShaderGen: Clamp zCoord to the depth range. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 975b8e7f71..562cb9be07 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -570,6 +570,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // the screen space depth value = far z + (clip z / clip w) * z range out.Write("\tint zCoord = " I_ZBIAS"[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y));\n"); } + out.Write("\tzCoord = clamp(zCoord, " I_ZBIAS"[1].x - " I_ZBIAS"[1].y, " I_ZBIAS"[1].x);\n"); // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel;